parse.c 42.2 KB
Newer Older
1
2
3
4
5
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
6
 * Copyright (c) 2013-2015 Damien P. George
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
Damien's avatar
Damien committed
28
29
30
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
31
#include <string.h>
Damien's avatar
Damien committed
32

33
#include "py/nlr.h"
34
35
36
37
#include "py/lexer.h"
#include "py/parse.h"
#include "py/parsenum.h"
#include "py/smallint.h"
38
39
#include "py/runtime.h"
#include "py/builtin.h"
Damien's avatar
Damien committed
40

41
42
#if MICROPY_ENABLE_COMPILER

Damien's avatar
Damien committed
43
#define RULE_ACT_ARG_MASK       (0x0f)
44
45
46
#define RULE_ACT_KIND_MASK      (0x30)
#define RULE_ACT_ALLOW_IDENT    (0x40)
#define RULE_ACT_ADD_BLANK      (0x80)
Damien's avatar
Damien committed
47
48
49
50
51
52
53
54
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
55
#define RULE_ARG_OPT_RULE       (0x3000)
Damien's avatar
Damien committed
56

57
#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
58

Damien's avatar
Damien committed
59
60
61
62
63
64
65
66
67
68
69
70
71
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
72
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
73
#include "py/grammar.h"
Damien's avatar
Damien committed
74
75
#undef DEF_RULE
    RULE_maximum_number_of,
76
    RULE_string, // special node for non-interned string
77
    RULE_bytes, // special node for non-interned bytes
78
    RULE_const_object, // special node for a constant, generic Python object
Damien's avatar
Damien committed
79
80
};

81
82
#define ident                   (RULE_ACT_ALLOW_IDENT)
#define blank                   (RULE_ACT_ADD_BLANK)
Damien's avatar
Damien committed
83
84
85
86
87
#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
88
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
89
90
91
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
92
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
93
#else
94
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
95
#endif
96
#include "py/grammar.h"
Damien's avatar
Damien committed
97
98
99
100
101
102
103
104
105
106
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

107
STATIC const rule_t *rules[] = {
108
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
109
#include "py/grammar.h"
Damien's avatar
Damien committed
110
111
112
113
#undef DEF_RULE
};

typedef struct _rule_stack_t {
114
115
116
    size_t src_line : 8 * sizeof(size_t) - 8; // maximum bits storing source line number
    size_t rule_id : 8; // this must be large enough to fit largest rule number
    size_t arg_i; // this dictates the maximum nodes in a "list" of things
Damien's avatar
Damien committed
117
118
} rule_stack_t;

119
typedef struct _mp_parse_chunk_t {
120
    size_t alloc;
121
    union {
122
        size_t used;
123
124
125
126
127
        struct _mp_parse_chunk_t *next;
    } union_;
    byte data[];
} mp_parse_chunk_t;

128
129
130
131
132
133
typedef enum {
    PARSE_ERROR_NONE = 0,
    PARSE_ERROR_MEMORY,
    PARSE_ERROR_CONST,
} parse_error_t;

Damien's avatar
Damien committed
134
typedef struct _parser_t {
135
    parse_error_t parse_error;
136

137
138
    size_t rule_stack_alloc;
    size_t rule_stack_top;
Damien's avatar
Damien committed
139
140
    rule_stack_t *rule_stack;

141
142
    size_t result_stack_alloc;
    size_t result_stack_top;
143
    mp_parse_node_t *result_stack;
144
145

    mp_lexer_t *lexer;
146
147
148

    mp_parse_tree_t tree;
    mp_parse_chunk_t *cur_chunk;
Damien's avatar
Damien committed
149

150
151
152
153
    #if MICROPY_COMP_CONST
    mp_map_t consts;
    #endif
} parser_t;
154

155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
    // use a custom memory allocator to store parse nodes sequentially in large chunks

    mp_parse_chunk_t *chunk = parser->cur_chunk;

    if (chunk != NULL && chunk->union_.used + num_bytes > chunk->alloc) {
        // not enough room at end of previously allocated chunk so try to grow
        mp_parse_chunk_t *new_data = (mp_parse_chunk_t*)m_renew_maybe(byte, chunk,
            sizeof(mp_parse_chunk_t) + chunk->alloc,
            sizeof(mp_parse_chunk_t) + chunk->alloc + num_bytes, false);
        if (new_data == NULL) {
            // could not grow existing memory; shrink it to fit previous
            (void)m_renew(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc,
                sizeof(mp_parse_chunk_t) + chunk->union_.used);
            chunk->alloc = chunk->union_.used;
            chunk->union_.next = parser->tree.chunk;
            parser->tree.chunk = chunk;
            chunk = NULL;
        } else {
            // could grow existing memory
            chunk->alloc += num_bytes;
        }
    }

    if (chunk == NULL) {
        // no previous chunk, allocate a new chunk
        size_t alloc = MICROPY_ALLOC_PARSE_CHUNK_INIT;
        if (alloc < num_bytes) {
            alloc = num_bytes;
        }
        chunk = (mp_parse_chunk_t*)m_new(byte, sizeof(mp_parse_chunk_t) + alloc);
        chunk->alloc = alloc;
        chunk->union_.used = 0;
        parser->cur_chunk = chunk;
    }

    byte *ret = chunk->data + chunk->union_.used;
    chunk->union_.used += num_bytes;
    return ret;
}

196
STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t arg_i) {
197
    if (parser->parse_error) {
198
199
        return;
    }
Damien's avatar
Damien committed
200
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
201
        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
202
        if (rs == NULL) {
203
            parser->parse_error = PARSE_ERROR_MEMORY;
204
205
206
            return;
        }
        parser->rule_stack = rs;
207
        parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien's avatar
Damien committed
208
    }
209
210
211
212
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
213
214
}

215
STATIC void push_rule_from_arg(parser_t *parser, size_t arg) {
Damien's avatar
Damien committed
216
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
217
    size_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien's avatar
Damien committed
218
    assert(rule_id < RULE_maximum_number_of);
219
    push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien's avatar
Damien committed
220
221
}

222
STATIC void pop_rule(parser_t *parser, const rule_t **rule, size_t *arg_i, size_t *src_line) {
223
    assert(!parser->parse_error);
Damien's avatar
Damien committed
224
225
226
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
227
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
228
229
}

230
mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg) {
231
232
233
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
234
    return (mp_parse_node_t)(kind | (arg << 4));
Damien's avatar
Damien committed
235
236
}

237
int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes) {
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
    if (MP_PARSE_NODE_IS_NULL(*pn)) {
        *nodes = NULL;
        return 0;
    } else if (MP_PARSE_NODE_IS_LEAF(*pn)) {
        *nodes = pn;
        return 1;
    } else {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)(*pn);
        if (MP_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
            *nodes = pn;
            return 1;
        } else {
            *nodes = pns->nodes;
            return MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        }
    }
}

256
#if MICROPY_DEBUG_PRINTERS
257
void mp_parse_node_print(mp_parse_node_t pn, size_t indent) {
258
259
260
261
262
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
263
    for (size_t i = 0; i < indent; i++) {
Damien's avatar
Damien committed
264
265
        printf(" ");
    }
266
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
267
        printf("NULL\n");
268
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
269
        mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
270
        printf("int(" INT_FMT ")\n", arg);
271
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
272
        uintptr_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
273
274
275
276
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
277
            case MP_PARSE_NODE_TOKEN: printf("tok(%u)\n", (uint)arg); break;
Damien's avatar
Damien committed
278
279
280
            default: assert(0);
        }
    } else {
281
        // node must be a mp_parse_node_struct_t
282
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
283
284
        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
            printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
285
286
        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
            printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
287
        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
288
289
290
            #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
            printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
            #else
291
            printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]);
292
            #endif
293
        } else {
294
            size_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
295
#ifdef USE_RULE_NAME
296
            printf("%s(%u) (n=%u)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n);
Damien's avatar
Damien committed
297
#else
298
            printf("rule(%u) (n=%u)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n);
Damien's avatar
Damien committed
299
#endif
300
            for (size_t i = 0; i < n; i++) {
301
302
                mp_parse_node_print(pns->nodes[i], indent + 2);
            }
Damien's avatar
Damien committed
303
304
305
        }
    }
}
306
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
307
308

/*
309
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
310
    printf("result stack, most recent first\n");
311
    for (ssize_t i = parser->result_stack_top - 1; i >= 0; i--) {
312
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
313
314
315
316
    }
}
*/

317
STATIC mp_parse_node_t pop_result(parser_t *parser) {
318
    if (parser->parse_error) {
319
320
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
321
322
323
324
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

325
STATIC mp_parse_node_t peek_result(parser_t *parser, size_t pos) {
326
    if (parser->parse_error) {
327
328
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
329
330
331
332
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

333
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
334
    if (parser->parse_error) {
335
336
        return;
    }
337
    if (parser->result_stack_top >= parser->result_stack_alloc) {
338
        mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
339
        if (stack == NULL) {
340
            parser->parse_error = PARSE_ERROR_MEMORY;
341
342
            return;
        }
343
        parser->result_stack = stack;
344
        parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
345
    }
Damien's avatar
Damien committed
346
347
348
    parser->result_stack[parser->result_stack_top++] = pn;
}

349
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) {
350
    mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
351
    if (pn == NULL) {
352
        parser->parse_error = PARSE_ERROR_MEMORY;
353
        return MP_PARSE_NODE_NULL;
354
355
    }
    pn->source_line = src_line;
356
    pn->kind_num_nodes = rule_kind | (2 << 8);
357
358
    char *p = m_new(char, len);
    memcpy(p, str, len);
359
    pn->nodes[0] = (uintptr_t)p;
360
    pn->nodes[1] = len;
361
362
363
    return (mp_parse_node_t)pn;
}

364
STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) {
365
    mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
366
    if (pn == NULL) {
367
        parser->parse_error = PARSE_ERROR_MEMORY;
368
369
370
        return MP_PARSE_NODE_NULL;
    }
    pn->source_line = src_line;
371
372
373
374
375
376
    #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
    // nodes are 32-bit pointers, but need to store 64-bit object
    pn->kind_num_nodes = RULE_const_object | (2 << 8);
    pn->nodes[0] = (uint64_t)obj;
    pn->nodes[1] = (uint64_t)obj >> 32;
    #else
377
    pn->kind_num_nodes = RULE_const_object | (1 << 8);
378
    pn->nodes[0] = (uintptr_t)obj;
379
    #endif
380
    return (mp_parse_node_t)pn;
381
}
382

383
STATIC void push_result_token(parser_t *parser) {
384
    mp_parse_node_t pn;
385
386
    mp_lexer_t *lex = parser->lexer;
    if (lex->tok_kind == MP_TOKEN_NAME) {
387
388
389
390
391
392
393
394
395
396
397
        qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
        #if MICROPY_COMP_CONST
        // lookup identifier in table of dynamic constants
        mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP);
        if (elem != NULL) {
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(elem->value));
        } else
        #endif
        {
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
        }
398
399
400
401
    } else if (lex->tok_kind == MP_TOKEN_INTEGER) {
        mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
        if (MP_OBJ_IS_SMALL_INT(o)) {
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(o));
Damien's avatar
Damien committed
402
        } else {
403
            pn = make_node_const_object(parser, lex->tok_line, o);
Damien's avatar
Damien committed
404
        }
405
406
407
    } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) {
        mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex);
        pn = make_node_const_object(parser, lex->tok_line, o);
408
409
    } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
        // Don't automatically intern all strings/bytes.  doc strings (which are usually large)
410
411
        // will be discarded by the compiler, and so we shouldn't intern them.
        qstr qst = MP_QSTR_NULL;
412
        if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
413
            // intern short strings
414
            qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
415
416
        } else {
            // check if this string is already interned
417
            qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
418
419
420
        }
        if (qst != MP_QSTR_NULL) {
            // qstr exists, make a leaf node
421
            pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
422
        } else {
423
            // not interned, make a node holding a pointer to the string/bytes data
424
            pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
425
        }
Damien's avatar
Damien committed
426
    } else {
427
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien's avatar
Damien committed
428
429
430
431
    }
    push_result_node(parser, pn);
}

432
#if MICROPY_COMP_MODULE_CONST
433
STATIC const mp_rom_map_elem_t mp_constants_table[] = {
434
    #if MICROPY_PY_UCTYPES
435
    { MP_ROM_QSTR(MP_QSTR_uctypes), MP_ROM_PTR(&mp_module_uctypes) },
436
437
438
439
440
441
442
443
    #endif
    // Extra constants as defined by a port
    MICROPY_PORT_CONSTANTS
};
STATIC MP_DEFINE_CONST_MAP(mp_constants_map, mp_constants_table);
#endif

#if MICROPY_COMP_CONST_FOLDING
444
STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args) {
445
446
447
448
449
450
451
452
453
454
455
456
457
    // this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4
    // it does not do partial folding, eg 1 + 2 + x -> 3 + x

    mp_int_t arg0;
    if (rule->rule_id == RULE_expr
        || rule->rule_id == RULE_xor_expr
        || rule->rule_id == RULE_and_expr) {
        // folding for binary ops: | ^ &
        mp_parse_node_t pn = peek_result(parser, num_args - 1);
        if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
            return false;
        }
        arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
458
        for (ssize_t i = num_args - 2; i >= 0; --i) {
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
            pn = peek_result(parser, i);
            if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
                return false;
            }
            mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
            if (rule->rule_id == RULE_expr) {
                // int | int
                arg0 |= arg1;
            } else if (rule->rule_id == RULE_xor_expr) {
                // int ^ int
                arg0 ^= arg1;
            } else if (rule->rule_id == RULE_and_expr) {
                // int & int
                arg0 &= arg1;
            }
        }
    } else if (rule->rule_id == RULE_shift_expr
        || rule->rule_id == RULE_arith_expr
        || rule->rule_id == RULE_term) {
        // folding for binary ops: << >> + - * / % //
        mp_parse_node_t pn = peek_result(parser, num_args - 1);
        if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
            return false;
        }
        arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
484
        for (ssize_t i = num_args - 2; i >= 1; i -= 2) {
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
            pn = peek_result(parser, i - 1);
            if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
                return false;
            }
            mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
            mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i));
            if (tok == MP_TOKEN_OP_DBL_LESS) {
                // int << int
                if (arg1 >= (mp_int_t)BITS_PER_WORD
                    || arg0 > (MP_SMALL_INT_MAX >> arg1)
                    || arg0 < (MP_SMALL_INT_MIN >> arg1)) {
                    return false;
                }
                arg0 <<= arg1;
            } else if (tok == MP_TOKEN_OP_DBL_MORE) {
                // int >> int
                if (arg1 >= (mp_int_t)BITS_PER_WORD) {
                    // Shifting to big amounts is underfined behavior
                    // in C and is CPU-dependent; propagate sign bit.
                    arg1 = BITS_PER_WORD - 1;
                }
                arg0 >>= arg1;
            } else if (tok == MP_TOKEN_OP_PLUS) {
                // int + int
                arg0 += arg1;
            } else if (tok == MP_TOKEN_OP_MINUS) {
                // int - int
                arg0 -= arg1;
            } else if (tok == MP_TOKEN_OP_STAR) {
                // int * int
                if (mp_small_int_mul_overflow(arg0, arg1)) {
                    return false;
                }
                arg0 *= arg1;
            } else if (tok == MP_TOKEN_OP_SLASH) {
                // int / int
                return false;
            } else if (tok == MP_TOKEN_OP_PERCENT) {
                // int % int
                if (arg1 == 0) {
                    return false;
                }
                arg0 = mp_small_int_modulo(arg0, arg1);
            } else {
                assert(tok == MP_TOKEN_OP_DBL_SLASH); // should be
                // int // int
                if (arg1 == 0) {
                    return false;
                }
                arg0 = mp_small_int_floor_divide(arg0, arg1);
            }
            if (!MP_SMALL_INT_FITS(arg0)) {
                return false;
            }
        }
    } else if (rule->rule_id == RULE_factor_2) {
        // folding for unary ops: + - ~
        mp_parse_node_t pn = peek_result(parser, 0);
        if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
            return false;
        }
        arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
        mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1));
        if (tok == MP_TOKEN_OP_PLUS) {
            // +int
        } else if (tok == MP_TOKEN_OP_MINUS) {
            // -int
            arg0 = -arg0;
            if (!MP_SMALL_INT_FITS(arg0)) {
                return false;
            }
        } else {
            assert(tok == MP_TOKEN_OP_TILDE); // should be
            // ~int
            arg0 = ~arg0;
        }

    #if MICROPY_COMP_CONST
    } else if (rule->rule_id == RULE_expr_stmt) {
        mp_parse_node_t pn1 = peek_result(parser, 0);
        if (!MP_PARSE_NODE_IS_NULL(pn1)
            && !(MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_augassign)
            || MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_assign_list))) {
            // this node is of the form <x> = <y>
            mp_parse_node_t pn0 = peek_result(parser, 1);
            if (MP_PARSE_NODE_IS_ID(pn0)
                && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_power)
                && MP_PARSE_NODE_IS_ID(((mp_parse_node_struct_t*)pn1)->nodes[0])
                && MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn1)->nodes[0]) == MP_QSTR_const
                && MP_PARSE_NODE_IS_STRUCT_KIND(((mp_parse_node_struct_t*)pn1)->nodes[1], RULE_trailer_paren)
                && MP_PARSE_NODE_IS_NULL(((mp_parse_node_struct_t*)pn1)->nodes[2])
                ) {
                // code to assign dynamic constants: id = const(value)

                // get the id
                qstr id = MP_PARSE_NODE_LEAF_ARG(pn0);

                // get the value
                mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0];
                if (!MP_PARSE_NODE_IS_SMALL_INT(pn_value)) {
                    parser->parse_error = PARSE_ERROR_CONST;
                    return false;
                }
                mp_int_t value = MP_PARSE_NODE_LEAF_SMALL_INT(pn_value);

                // store the value in the table of dynamic constants
                mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
                assert(elem->value == MP_OBJ_NULL);
                elem->value = MP_OBJ_NEW_SMALL_INT(value);

                // replace const(value) with value
                pop_result(parser);
                push_result_node(parser, pn_value);

                // finished folding this assignment, but we still want it to be part of the tree
                return false;
            }
        }
        return false;
    #endif

    #if MICROPY_COMP_MODULE_CONST
    } else if (rule->rule_id == RULE_power) {
        mp_parse_node_t pn0 = peek_result(parser, 2);
        mp_parse_node_t pn1 = peek_result(parser, 1);
        mp_parse_node_t pn2 = peek_result(parser, 0);
        if (!(MP_PARSE_NODE_IS_ID(pn0)
            && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_trailer_period)
            && MP_PARSE_NODE_IS_NULL(pn2))) {
            return false;
        }
        // id1.id2
        // look it up in constant table, see if it can be replaced with an integer
        mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pn1;
        assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0]));
        qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0);
        qstr q_attr = MP_PARSE_NODE_LEAF_ARG(pns1->nodes[0]);
        mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&mp_constants_map, MP_OBJ_NEW_QSTR(q_base), MP_MAP_LOOKUP);
        if (elem == NULL) {
            return false;
        }
        mp_obj_t dest[2];
        mp_load_method_maybe(elem->value, q_attr, dest);
628
        if (!(MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) {
629
630
631
632
633
634
635
636
637
638
639
            return false;
        }
        arg0 = MP_OBJ_SMALL_INT_VALUE(dest[0]);
    #endif

    } else {
        return false;
    }

    // success folding this rule

640
    for (size_t i = num_args; i > 0; i--) {
641
642
643
644
645
646
647
648
        pop_result(parser);
    }
    push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0));

    return true;
}
#endif

649
STATIC void push_result_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t num_args) {
650
651
652
653
654
655
656
657
658
659
660
661
662
663
    // optimise away parenthesis around an expression if possible
    if (rule->rule_id == RULE_atom_paren) {
        // there should be just 1 arg for this rule
        mp_parse_node_t pn = peek_result(parser, 0);
        if (MP_PARSE_NODE_IS_NULL(pn)) {
            // need to keep parenthesis for ()
        } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, RULE_testlist_comp)) {
            // need to keep parenthesis for (a, b, ...)
        } else {
            // parenthesis around a single expression, so it's just the expression
            return;
        }
    }

664
665
666
667
668
669
670
    #if MICROPY_COMP_CONST_FOLDING
    if (fold_constants(parser, rule, num_args)) {
        // we folded this rule so return straight away
        return;
    }
    #endif

671
    mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args);
672
    if (pn == NULL) {
673
        parser->parse_error = PARSE_ERROR_MEMORY;
674
675
676
677
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
678
    for (size_t i = num_args; i > 0; i--) {
Damien's avatar
Damien committed
679
680
        pn->nodes[i - 1] = pop_result(parser);
    }
681
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
682
683
}

684
mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
685

686
    // initialise parser and allocate memory for its stacks
687

688
    parser_t parser;
689

690
    parser.parse_error = PARSE_ERROR_NONE;
691

692
    parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
693
694
    parser.rule_stack_top = 0;
    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien's avatar
Damien committed
695

696
    parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
697
698
    parser.result_stack_top = 0;
    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien's avatar
Damien committed
699

700
701
    parser.lexer = lex;

702
703
704
    parser.tree.chunk = NULL;
    parser.cur_chunk = NULL;

705
706
707
708
    #if MICROPY_COMP_CONST
    mp_map_init(&parser.consts, 0);
    #endif

709
710
711
712
    // check if we could allocate the stacks
    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
        goto memory_error;
    }
713

714
    // work out the top-level rule to use, and push it on the stack
715
    size_t top_level_rule;
Damien's avatar
Damien committed
716
    switch (input_kind) {
717
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
718
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
719
720
        default: top_level_rule = RULE_file_input;
    }
721
    push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
722

723
724
    // parse!

725
726
    size_t n, i; // state for the current rule
    size_t rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
727
    bool backtrack = false;
728
    const rule_t *rule = NULL;
Damien's avatar
Damien committed
729
730
731

    for (;;) {
        next_rule:
732
        if (parser.rule_stack_top == 0 || parser.parse_error) {
Damien's avatar
Damien committed
733
734
735
            break;
        }

736
        pop_rule(&parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
737
738
739
740
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
741
742
        printf("depth=%d ", parser.rule_stack_top);
        for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien's avatar
Damien committed
743
744
745
746
747
748
749
750
751
752
753
754
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
755
756
757
758
759
760
                for (; i < n; ++i) {
                    uint16_t kind = rule->arg[i] & RULE_ARG_KIND_MASK;
                    if (kind == RULE_ARG_TOK) {
                        if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                            push_result_token(&parser);
                            mp_lexer_to_next(lex);
Damien's avatar
Damien committed
761
                            goto next_rule;
762
                        }
Damien's avatar
Damien committed
763
                    } else {
764
765
766
767
768
                        assert(kind == RULE_ARG_RULE);
                        if (i + 1 < n) {
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
                        }
                        push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
769
770
771
                        goto next_rule;
                    }
                }
772
                backtrack = true;
Damien's avatar
Damien committed
773
774
                break;

775
            case RULE_ACT_AND: {
Damien's avatar
Damien committed
776
777
778
779
780
781

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
782
                        push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
798
                        case RULE_ARG_TOK: {
Damien's avatar
Damien committed
799
                            // need to match a token
800
                            mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
801
                            if (lex->tok_kind == tok_kind) {
Damien's avatar
Damien committed
802
                                // matched token
803
                                if (tok_kind == MP_TOKEN_NAME) {
804
                                    push_result_token(&parser);
Damien's avatar
Damien committed
805
                                }
806
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
807
808
809
810
811
812
813
814
815
816
817
818
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
819
                        }
Damien's avatar
Damien committed
820
821
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
822
                        rule_and_no_other_choice:
823
824
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
825
826
827
                            goto next_rule;
                        default:
                            assert(0);
828
                            goto rule_and_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
829
830
831
832
833
834
835
836
837
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
838
                bool emit_rule = false;
839
                for (size_t x = 0; x < n; ++x) {
Damien's avatar
Damien committed
840
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
841
                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
842
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
843
844
                            emit_rule = true;
                        }
845
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
846
847
848
849
850
851
852
853
854
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

855
                #if !MICROPY_ENABLE_DOC_STRING
856
                // this code discards lonely statements, such as doc strings
857
858
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                    mp_parse_node_t p = peek_result(&parser, 1);
859
                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
860
                        pop_result(&parser); // MP_PARSE_NODE_NULL
861
862
863
864
                        mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
                        if (MP_PARSE_NODE_IS_STRUCT(pn)) {
                            mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
                            if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
865
                                m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]);
866
867
                            }
                        }
868
                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
869
870
871
                        break;
                    }
                }
872
                #endif
873

Damien's avatar
Damien committed
874
875
876
877
878
                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

879
880
881
                // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
                // rule should not be emitted if it has only 1 argument
                if (rule->act & RULE_ACT_ALLOW_IDENT) {
Damien's avatar
Damien committed
882
883
884
885
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
886
                if (ADD_BLANK_NODE(rule)) {
Damien's avatar
Damien committed
887
                    emit_rule = true;
888
                    push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
889
890
891
                    i += 1;
                }

892
893
                size_t num_not_nil = 0;
                for (size_t x = 0; x < i; ++x) {
894
                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
895
896
897
                        num_not_nil += 1;
                    }
                }
898
899
                if (emit_rule || num_not_nil != 1) {
                    // need to add rule when num_not_nil==0 for, eg, atom_paren, testlist_comp_3b
900
                    push_result_rule(&parser, rule_src_line, rule, i);
901
                } else {
Damien's avatar
Damien committed
902
                    // single result, leave it on stack
903
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
904
                    for (size_t x = 0; x < i; ++x) {
905
                        mp_parse_node_t pn2 = pop_result(&parser);
906
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
907
908
909
                            pn = pn2;
                        }
                    }
910
                    push_result_node(&parser, pn);
Damien's avatar
Damien committed
911
912
                }
                break;
913
            }
Damien's avatar
Damien committed
914

915
            case RULE_ACT_LIST: {
Damien's avatar
Damien committed
916
917
918
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
919
                bool had_trailing_sep;
Damien's avatar
Damien committed
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
952
                        size_t arg = rule->arg[i & 1 & n];
Damien's avatar
Damien committed
953
954
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
955
                                if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
956
957
958
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
959
                                        push_result_token(&parser);
Damien's avatar
Damien committed
960
                                    }
961
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
962
963
964
965
966
967
968
969
970
971
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
972
                            rule_list_no_other_choice:
973
974
                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(&parser, arg); // push child of list-rule
Damien's avatar
Damien committed
975
976
977
                                goto next_rule;
                            default:
                                assert(0);
978
                                goto rule_list_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
995
                        push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
996
997
998
999
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
1000
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
1001
1002
                }
                break;
1003
            }
Damien's avatar
Damien committed
1004
1005
1006
1007
1008

            default:
                assert(0);
        }
    }
1009

1010
1011
1012
1013
    #if MICROPY_COMP_CONST
    mp_map_deinit(&parser.consts);
    #endif

1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
    // truncate final chunk and link into chain of chunks
    if (parser.cur_chunk != NULL) {
        (void)m_renew(byte, parser.cur_chunk,
            sizeof(mp_parse_chunk_t) + parser.cur_chunk->alloc,
            sizeof(mp_parse_chunk_t) + parser.cur_chunk->union_.used);
        parser.cur_chunk->alloc = parser.cur_chunk->union_.used;
        parser.cur_chunk->union_.next = parser.tree.chunk;
        parser.tree.chunk = parser.cur_chunk;
    }

1024
    mp_obj_t exc;
1025

1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
    if (parser.parse_error) {
        #if MICROPY_COMP_CONST
        if (parser.parse_error == PARSE_ERROR_CONST) {
            exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
                "constant must be an integer");
        } else
        #endif
        {
            assert(parser.parse_error == PARSE_ERROR_MEMORY);
        memory_error:
            exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
                "parser could not allocate enough memory");
        }
1039
        parser.tree.root = MP_PARSE_NODE_NULL;
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
    } else if (
        lex->tok_kind != MP_TOKEN_END // check we are at the end of the token stream
        || parser.result_stack_top == 0 // check that we got a node (can fail on empty input)
        ) {
    syntax_error:
        if (lex->tok_kind == MP_TOKEN_INDENT) {
            exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
                "unexpected indent");
        } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
            exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
                "unindent does not match any outer indentation level");
        } else {
            exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
                "invalid syntax");
        }
        parser.tree.root = MP_PARSE_NODE_NULL;
    } else {
        // no errors
1058

1059
1060
1061
1062
        //result_stack_show(parser);
        //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
        //printf("result stack alloc: %d\n", parser.result_stack_alloc);
        //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
1063

1064
1065
1066
1067
        // get the root parse node that we created
        assert(parser.result_stack_top == 1);
        exc = MP_OBJ_NULL;
        parser.tree.root = parser.result_stack[0];
1068
1069
    }

1070
    // free the memory that we don't need anymore
1071
1072
    m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
    m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
    // we also free the lexer on behalf of the caller (see below)

    if (exc != MP_OBJ_NULL) {
        // had an error so raise the exception
        // add traceback to give info about file name and location
        // we don't have a 'block' name, so just pass the NULL qstr to indicate this
        mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
        mp_lexer_free(lex);
        nlr_raise(exc);
    } else {
        mp_lexer_free(lex);
1084
        return parser.tree;
1085
    }
Damien's avatar
Damien committed
1086
}
1087
1088
1089
1090
1091
1092
1093
1094
1095

void mp_parse_tree_clear(mp_parse_tree_t *tree) {
    mp_parse_chunk_t *chunk = tree->chunk;
    while (chunk != NULL) {
        mp_parse_chunk_t *next = chunk->union_.next;
        m_del(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc);
        chunk = next;
    }
}
1096
1097

#endif // MICROPY_ENABLE_COMPILER