parse.c 31.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
Damien's avatar
Damien committed
28
29
30
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
31
#include <string.h>
Damien's avatar
Damien committed
32

33
#include "py/nlr.h"
34
35
36
37
#include "py/lexer.h"
#include "py/parse.h"
#include "py/parsenum.h"
#include "py/smallint.h"
Damien's avatar
Damien committed
38
39

#define RULE_ACT_ARG_MASK       (0x0f)
40
41
42
#define RULE_ACT_KIND_MASK      (0x30)
#define RULE_ACT_ALLOW_IDENT    (0x40)
#define RULE_ACT_ADD_BLANK      (0x80)
Damien's avatar
Damien committed
43
44
45
46
47
48
49
50
51
52
53
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
#define RULE_ARG_OPT_TOK        (0x3000)
#define RULE_ARG_OPT_RULE       (0x4000)

54
#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
55

Damien's avatar
Damien committed
56
57
58
59
60
61
62
63
64
65
66
67
68
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
69
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
70
#include "py/grammar.h"
Damien's avatar
Damien committed
71
72
#undef DEF_RULE
    RULE_maximum_number_of,
73
    RULE_string, // special node for non-interned string
74
    RULE_bytes, // special node for non-interned bytes
75
    RULE_const_object, // special node for a constant, generic Python object
Damien's avatar
Damien committed
76
77
};

78
79
#define ident                   (RULE_ACT_ALLOW_IDENT)
#define blank                   (RULE_ACT_ADD_BLANK)
Damien's avatar
Damien committed
80
81
82
83
84
#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
85
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
86
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
87
#define opt_tok(t)              (RULE_ARG_OPT_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
88
89
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
90
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
91
#else
92
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
93
#endif
94
#include "py/grammar.h"
Damien's avatar
Damien committed
95
96
97
98
99
100
101
102
103
104
105
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_tok
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

106
STATIC const rule_t *rules[] = {
107
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
108
#include "py/grammar.h"
Damien's avatar
Damien committed
109
110
111
112
#undef DEF_RULE
};

typedef struct _rule_stack_t {
113
114
115
    mp_uint_t src_line : BITS_PER_WORD - 8; // maximum bits storing source line number
    mp_uint_t rule_id : 8; // this must be large enough to fit largest rule number
    mp_uint_t arg_i; // this dictates the maximum nodes in a "list" of things
Damien's avatar
Damien committed
116
117
118
} rule_stack_t;

typedef struct _parser_t {
119
120
    bool had_memory_error;

121
122
    mp_uint_t rule_stack_alloc;
    mp_uint_t rule_stack_top;
Damien's avatar
Damien committed
123
124
    rule_stack_t *rule_stack;

125
126
    mp_uint_t result_stack_alloc;
    mp_uint_t result_stack_top;
127
    mp_parse_node_t *result_stack;
128
129

    mp_lexer_t *lexer;
Damien's avatar
Damien committed
130
131
} parser_t;

132
133
134
135
STATIC inline void memory_error(parser_t *parser) {
    parser->had_memory_error = true;
}

136
STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) {
137
138
139
    if (parser->had_memory_error) {
        return;
    }
Damien's avatar
Damien committed
140
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
141
        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
142
143
144
145
146
        if (rs == NULL) {
            memory_error(parser);
            return;
        }
        parser->rule_stack = rs;
147
        parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien's avatar
Damien committed
148
    }
149
150
151
152
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
153
154
}

155
STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
Damien's avatar
Damien committed
156
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
157
    mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien's avatar
Damien committed
158
    assert(rule_id < RULE_maximum_number_of);
159
    push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien's avatar
Damien committed
160
161
}

162
STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
163
    assert(!parser->had_memory_error);
Damien's avatar
Damien committed
164
165
166
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
167
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
168
169
}

170
mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg) {
171
172
173
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
174
    return (mp_parse_node_t)(kind | (arg << 4));
Damien's avatar
Damien committed
175
176
}

177
void mp_parse_node_free(mp_parse_node_t pn) {
178
179
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
180
181
        mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
182
        if (rule_id == RULE_string || rule_id == RULE_bytes) {
183
            m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]);
184
185
        } else if (rule_id == RULE_const_object) {
            // don't free the const object since it's probably used by the compiled code
186
        } else {
187
            bool adjust = ADD_BLANK_NODE(rules[rule_id]);
188
189
190
191
192
193
194
195
196
            if (adjust) {
                n--;
            }
            for (mp_uint_t i = 0; i < n; i++) {
                mp_parse_node_free(pns->nodes[i]);
            }
            if (adjust) {
                n++;
            }
197
        }
198
        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
199
200
201
    }
}

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
int mp_parse_node_extract_list(mp_parse_node_t *pn, mp_uint_t pn_kind, mp_parse_node_t **nodes) {
    if (MP_PARSE_NODE_IS_NULL(*pn)) {
        *nodes = NULL;
        return 0;
    } else if (MP_PARSE_NODE_IS_LEAF(*pn)) {
        *nodes = pn;
        return 1;
    } else {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)(*pn);
        if (MP_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
            *nodes = pn;
            return 1;
        } else {
            *nodes = pns->nodes;
            return MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        }
    }
}

221
#if MICROPY_DEBUG_PRINTERS
222
void mp_parse_node_print(mp_parse_node_t pn, mp_uint_t indent) {
223
224
225
226
227
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
228
    for (mp_uint_t i = 0; i < indent; i++) {
Damien's avatar
Damien committed
229
230
        printf(" ");
    }
231
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
232
        printf("NULL\n");
233
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
234
        mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
235
        printf("int(" INT_FMT ")\n", arg);
236
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
237
        mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
238
239
240
241
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
242
            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien's avatar
Damien committed
243
244
245
            default: assert(0);
        }
    } else {
246
        // node must be a mp_parse_node_struct_t
247
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
248
249
        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
            printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
250
251
        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
            printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
252
253
        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
            printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]);
254
        } else {
255
            mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
256
#ifdef USE_RULE_NAME
257
            printf("%s(" UINT_FMT ") (n=" UINT_FMT ")\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
258
#else
259
            printf("rule(" UINT_FMT ") (n=" UINT_FMT ")\n", (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
260
#endif
261
            for (mp_uint_t i = 0; i < n; i++) {
262
263
                mp_parse_node_print(pns->nodes[i], indent + 2);
            }
Damien's avatar
Damien committed
264
265
266
        }
    }
}
267
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
268
269

/*
270
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
271
    printf("result stack, most recent first\n");
272
    for (mp_int_t i = parser->result_stack_top - 1; i >= 0; i--) {
273
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
274
275
276
277
    }
}
*/

278
STATIC mp_parse_node_t pop_result(parser_t *parser) {
279
280
281
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
282
283
284
285
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

286
STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) {
287
288
289
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
290
291
292
293
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

294
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
295
296
297
    if (parser->had_memory_error) {
        return;
    }
298
    if (parser->result_stack_top >= parser->result_stack_alloc) {
299
300
        mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
        if (stack == NULL) {
301
302
303
            memory_error(parser);
            return;
        }
304
        parser->result_stack = stack;
305
        parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
306
    }
Damien's avatar
Damien committed
307
308
309
    parser->result_stack[parser->result_stack_top++] = pn;
}

310
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) {
311
312
313
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
    if (pn == NULL) {
        memory_error(parser);
314
        return MP_PARSE_NODE_NULL;
315
316
    }
    pn->source_line = src_line;
317
    pn->kind_num_nodes = rule_kind | (2 << 8);
318
319
    char *p = m_new(char, len);
    memcpy(p, str, len);
320
    pn->nodes[0] = (mp_int_t)p;
321
    pn->nodes[1] = len;
322
323
324
325
326
327
328
329
330
331
332
333
334
    return (mp_parse_node_t)pn;
}

STATIC mp_parse_node_t make_node_const_object(parser_t *parser, mp_uint_t src_line, mp_obj_t obj) {
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 1);
    if (pn == NULL) {
        memory_error(parser);
        return MP_PARSE_NODE_NULL;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = RULE_const_object | (1 << 8);
    pn->nodes[0] = (mp_uint_t)obj;
    return (mp_parse_node_t)pn;
335
}
336

337
STATIC void push_result_token(parser_t *parser) {
338
    mp_parse_node_t pn;
339
340
341
    mp_lexer_t *lex = parser->lexer;
    if (lex->tok_kind == MP_TOKEN_NAME) {
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
342
343
344
345
    } else if (lex->tok_kind == MP_TOKEN_INTEGER) {
        mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
        if (MP_OBJ_IS_SMALL_INT(o)) {
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(o));
Damien's avatar
Damien committed
346
        } else {
347
            pn = make_node_const_object(parser, lex->tok_line, o);
Damien's avatar
Damien committed
348
        }
349
350
351
    } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) {
        mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex);
        pn = make_node_const_object(parser, lex->tok_line, o);
352
353
    } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
        // Don't automatically intern all strings/bytes.  doc strings (which are usually large)
354
355
        // will be discarded by the compiler, and so we shouldn't intern them.
        qstr qst = MP_QSTR_NULL;
356
        if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
357
            // intern short strings
358
            qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
359
360
        } else {
            // check if this string is already interned
361
            qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
362
363
364
        }
        if (qst != MP_QSTR_NULL) {
            // qstr exists, make a leaf node
365
            pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
366
        } else {
367
            // not interned, make a node holding a pointer to the string/bytes data
368
            pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
369
        }
Damien's avatar
Damien committed
370
    } else {
371
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien's avatar
Damien committed
372
373
374
375
    }
    push_result_node(parser, pn);
}

376
STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) {
377
378
379
380
381
382
383
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
384
    for (mp_uint_t i = num_args; i > 0; i--) {
Damien's avatar
Damien committed
385
386
        pn->nodes[i - 1] = pop_result(parser);
    }
387
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
388
389
}

390
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
391

392
    // initialise parser and allocate memory for its stacks
393

394
    parser_t parser;
395

396
    parser.had_memory_error = false;
397

398
    parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
399
400
    parser.rule_stack_top = 0;
    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien's avatar
Damien committed
401

402
    parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
403
404
    parser.result_stack_top = 0;
    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien's avatar
Damien committed
405

406
407
408
409
410
411
    parser.lexer = lex;

    // check if we could allocate the stacks
    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
        goto memory_error;
    }
412

413
    // work out the top-level rule to use, and push it on the stack
414
    mp_uint_t top_level_rule;
Damien's avatar
Damien committed
415
    switch (input_kind) {
416
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
417
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
418
419
        default: top_level_rule = RULE_file_input;
    }
420
    push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
421

422
423
    // parse!

424
425
    mp_uint_t n, i; // state for the current rule
    mp_uint_t rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
426
    bool backtrack = false;
427
    const rule_t *rule = NULL;
Damien's avatar
Damien committed
428
429
430

    for (;;) {
        next_rule:
431
        if (parser.rule_stack_top == 0 || parser.had_memory_error) {
Damien's avatar
Damien committed
432
433
434
            break;
        }

435
        pop_rule(&parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
436
437
438
439
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
440
441
        printf("depth=%d ", parser.rule_stack_top);
        for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien's avatar
Damien committed
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
                for (; i < n - 1; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
457
458
                            if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                                push_result_token(&parser);
459
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
460
461
462
463
                                goto next_rule;
                            }
                            break;
                        case RULE_ARG_RULE:
464
                        rule_or_no_other_choice:
465
466
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
467
468
469
                            goto next_rule;
                        default:
                            assert(0);
470
                            goto rule_or_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
471
472
473
                    }
                }
                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
474
475
                    if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                        push_result_token(&parser);
476
                        mp_lexer_to_next(lex);
Damien's avatar
Damien committed
477
478
479
480
481
                    } else {
                        backtrack = true;
                        goto next_rule;
                    }
                } else {
482
                    push_rule_from_arg(&parser, rule->arg[i]);
Damien's avatar
Damien committed
483
484
485
                }
                break;

486
            case RULE_ACT_AND: {
Damien's avatar
Damien committed
487
488
489
490
491
492

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
493
                        push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
509
                        case RULE_ARG_TOK: {
Damien's avatar
Damien committed
510
                            // need to match a token
511
                            mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
512
                            if (lex->tok_kind == tok_kind) {
Damien's avatar
Damien committed
513
                                // matched token
514
                                if (tok_kind == MP_TOKEN_NAME) {
515
                                    push_result_token(&parser);
Damien's avatar
Damien committed
516
                                }
517
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
518
519
520
521
522
523
524
525
526
527
528
529
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
530
                        }
Damien's avatar
Damien committed
531
532
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
533
                        rule_and_no_other_choice:
534
535
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
536
537
538
                            goto next_rule;
                        default:
                            assert(0);
539
                            goto rule_and_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
540
541
542
543
544
545
546
547
548
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
549
                bool emit_rule = false;
550
                for (mp_uint_t x = 0; x < n; ++x) {
Damien's avatar
Damien committed
551
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
552
                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
553
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
554
555
                            emit_rule = true;
                        }
556
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
557
558
559
560
561
562
563
564
565
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

566
567
#if !MICROPY_EMIT_CPYTHON && !MICROPY_ENABLE_DOC_STRING
                // this code discards lonely statements, such as doc strings
568
569
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                    mp_parse_node_t p = peek_result(&parser, 1);
570
                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
571
572
                        pop_result(&parser); // MP_PARSE_NODE_NULL
                        mp_parse_node_free(pop_result(&parser)); // RULE_string
573
                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
574
575
576
577
578
                        break;
                    }
                }
#endif

Damien's avatar
Damien committed
579
580
581
582
583
                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

584
585
586
587
588
589
                // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
                // rule should not be emitted if it has only 1 argument
                // NOTE: can't set this flag for atom_paren because we need it
                // to distinguish, for example, [a,b] from [(a,b)]
                // TODO possibly set for: varargslist_name, varargslist_equal
                if (rule->act & RULE_ACT_ALLOW_IDENT) {
Damien's avatar
Damien committed
590
591
592
593
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
594
                if (ADD_BLANK_NODE(rule)) {
Damien's avatar
Damien committed
595
                    emit_rule = true;
596
                    push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
597
598
599
                    i += 1;
                }

600
601
                mp_uint_t num_not_nil = 0;
                for (mp_uint_t x = 0; x < i; ++x) {
602
                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
603
604
605
606
607
                        num_not_nil += 1;
                    }
                }
                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
                if (emit_rule) {
608
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
609
                } else if (num_not_nil == 0) {
610
                    push_result_rule(&parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien's avatar
Damien committed
611
612
613
614
                    //result_stack_show(parser);
                    //assert(0);
                } else if (num_not_nil == 1) {
                    // single result, leave it on stack
615
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
616
                    for (mp_uint_t x = 0; x < i; ++x) {
617
                        mp_parse_node_t pn2 = pop_result(&parser);
618
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
619
620
621
                            pn = pn2;
                        }
                    }
622
                    push_result_node(&parser, pn);
Damien's avatar
Damien committed
623
                } else {
624
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
625
626
                }
                break;
627
            }
Damien's avatar
Damien committed
628

629
            case RULE_ACT_LIST: {
Damien's avatar
Damien committed
630
631
632
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
633
                bool had_trailing_sep;
Damien's avatar
Damien committed
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
666
                        mp_uint_t arg = rule->arg[i & 1 & n];
Damien's avatar
Damien committed
667
668
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
669
                                if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
670
671
672
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
673
                                        push_result_token(&parser);
Damien's avatar
Damien committed
674
                                    }
675
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
676
677
678
679
680
681
682
683
684
685
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
686
                            rule_list_no_other_choice:
687
688
                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(&parser, arg); // push child of list-rule
Damien's avatar
Damien committed
689
690
691
                                goto next_rule;
                            default:
                                assert(0);
692
                                goto rule_list_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
709
                        push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
710
711
712
713
714
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
                    //printf("done list %s %d %d\n", rule->rule_name, n, i);
715
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
716
717
                }
                break;
718
            }
Damien's avatar
Damien committed
719
720
721
722
723

            default:
                assert(0);
        }
    }
724

725
726
    mp_obj_t exc;
    mp_parse_node_t result;
727
728

    // check if we had a memory error
729
730
    if (parser.had_memory_error) {
memory_error:
731
732
        exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
            "parser could not allocate enough memory");
733
        result = MP_PARSE_NODE_NULL;
734
735
736
        goto finished;
    }

737
    // check we are at the end of the token stream
738
    if (lex->tok_kind != MP_TOKEN_END) {
739
        goto syntax_error;
Damien's avatar
Damien committed
740
    }
741

Damien's avatar
Damien committed
742
743
    //printf("--------------\n");
    //result_stack_show(parser);
744
745
    //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
    //printf("result stack alloc: %d\n", parser.result_stack_alloc);
Damien's avatar
Damien committed
746
    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
747
748

    // get the root parse node that we created
749
    assert(parser.result_stack_top == 1);
750
    exc = MP_OBJ_NULL;
751
    result = parser.result_stack[0];
752
753
754

finished:
    // free the memory that we don't need anymore
755
756
    m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
    m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
757
758
759
760
761
762
763
764
765
766
767
768
769
    // we also free the lexer on behalf of the caller (see below)

    if (exc != MP_OBJ_NULL) {
        // had an error so raise the exception
        // add traceback to give info about file name and location
        // we don't have a 'block' name, so just pass the NULL qstr to indicate this
        mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
        mp_lexer_free(lex);
        nlr_raise(exc);
    } else {
        mp_lexer_free(lex);
        return result;
    }
Damien's avatar
Damien committed
770
771

syntax_error:
772
    if (lex->tok_kind == MP_TOKEN_INDENT) {
773
774
        exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
            "unexpected indent");
775
    } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
776
777
        exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
            "unindent does not match any outer indentation level");
778
    } else {
779
780
        exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
            "invalid syntax");
Damien's avatar
Damien committed
781
#ifdef USE_RULE_NAME
782
        // debugging: print the rule name that failed and the token
783
784
        printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
785
        mp_token_show(lex);
786
#endif
787
#endif
788
    }
789
    result = MP_PARSE_NODE_NULL;
790
    goto finished;
Damien's avatar
Damien committed
791
}