parse.c 25.3 KB
Newer Older
Damien's avatar
Damien committed
1
2
3
4
5
6
7
8
9
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>

#include "misc.h"
10
#include "mpconfig.h"
11
#include "qstr.h"
Damien's avatar
Damien committed
12
#include "lexer.h"
13
#include "parsenumbase.h"
Damien's avatar
Damien committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#include "parse.h"

#define RULE_ACT_KIND_MASK      (0xf0)
#define RULE_ACT_ARG_MASK       (0x0f)
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_BLANK          (0x0000)
#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
#define RULE_ARG_OPT_TOK        (0x3000)
#define RULE_ARG_OPT_RULE       (0x4000)

30
31
#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond)

Damien's avatar
Damien committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
    RULE_none = 0,
46
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien's avatar
Damien committed
47
48
49
50
51
52
53
54
55
56
#include "grammar.h"
#undef DEF_RULE
    RULE_maximum_number_of,
};

#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
57
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
58
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
59
#define opt_tok(t)              (RULE_ARG_OPT_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
60
61
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
62
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
63
#else
64
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
65
66
67
68
69
70
71
72
73
74
75
76
77
#endif
#include "grammar.h"
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_tok
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

78
STATIC const rule_t *rules[] = {
Damien's avatar
Damien committed
79
    NULL,
80
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien's avatar
Damien committed
81
82
83
84
85
#include "grammar.h"
#undef DEF_RULE
};

typedef struct _rule_stack_t {
86
87
    unsigned int src_line : 24;
    unsigned int rule_id : 8;
Damien's avatar
Damien committed
88
89
90
91
92
93
94
95
    int32_t arg_i; // what should be the size and signedness?
} rule_stack_t;

typedef struct _parser_t {
    uint rule_stack_alloc;
    uint rule_stack_top;
    rule_stack_t *rule_stack;

96
    uint result_stack_alloc;
Damien's avatar
Damien committed
97
    uint result_stack_top;
98
    mp_parse_node_t *result_stack;
99
100

    mp_lexer_t *lexer;
Damien's avatar
Damien committed
101
102
} parser_t;

103
STATIC void push_rule(parser_t *parser, int src_line, const rule_t *rule, int arg_i) {
Damien's avatar
Damien committed
104
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
105
        parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc * 2);
Damien's avatar
Damien committed
106
107
        parser->rule_stack_alloc *= 2;
    }
108
109
110
111
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
112
113
}

114
STATIC void push_rule_from_arg(parser_t *parser, uint arg) {
Damien's avatar
Damien committed
115
116
117
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
    uint rule_id = arg & RULE_ARG_ARG_MASK;
    assert(rule_id < RULE_maximum_number_of);
118
    push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0);
Damien's avatar
Damien committed
119
120
}

121
STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *src_line) {
Damien's avatar
Damien committed
122
123
124
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
125
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
126
127
}

128
mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
129
130
131
132
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
    return (mp_parse_node_t)(kind | (arg << 5));
Damien's avatar
Damien committed
133
134
}

135
136
//int num_parse_nodes_allocated = 0;
mp_parse_node_struct_t *parse_node_new_struct(int src_line, int rule_id, int num_args) {
137
    mp_parse_node_struct_t *pn = m_new_obj_var(mp_parse_node_struct_t, mp_parse_node_t, num_args);
138
    pn->source_line = src_line;
Damien's avatar
Damien committed
139
    pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8);
140
    //num_parse_nodes_allocated += 1;
Damien's avatar
Damien committed
141
142
143
    return pn;
}

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
uint mp_parse_node_free(mp_parse_node_t pn) {
    uint cnt = 0;
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
        uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
        bool adjust = ADD_BLANK_NODE(rule_id);
        if (adjust) {
            n--;
        }
        for (uint i = 0; i < n; i++) {
            cnt += mp_parse_node_free(pns->nodes[i]);
        }
        if (adjust) {
            n++;
159
        }
160
        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
161
162
163
164
165
        cnt++;
    }
    return cnt;
}

166
167
#if MICROPY_DEBUG_PRINTERS
void mp_parse_node_print(mp_parse_node_t pn, int indent) {
168
169
170
171
172
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
Damien's avatar
Damien committed
173
174
175
    for (int i = 0; i < indent; i++) {
        printf(" ");
    }
176
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
177
        printf("NULL\n");
178
179
180
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
        machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
        printf("int(" INT_FMT ")\n", arg);
181
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
182
        machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
183
184
185
186
187
188
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
189
            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien's avatar
Damien committed
190
191
192
            default: assert(0);
        }
    } else {
193
194
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
        uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
195
#ifdef USE_RULE_NAME
196
        printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
197
#else
198
        printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
199
#endif
200
201
        for (uint i = 0; i < n; i++) {
            mp_parse_node_print(pns->nodes[i], indent + 2);
Damien's avatar
Damien committed
202
203
204
        }
    }
}
205
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
206
207

/*
208
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
209
210
    printf("result stack, most recent first\n");
    for (int i = parser->result_stack_top - 1; i >= 0; i--) {
211
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
212
213
214
215
    }
}
*/

216
STATIC mp_parse_node_t pop_result(parser_t *parser) {
Damien's avatar
Damien committed
217
218
219
220
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

221
STATIC mp_parse_node_t peek_result(parser_t *parser, int pos) {
Damien's avatar
Damien committed
222
223
224
225
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

226
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
227
228
229
230
    if (parser->result_stack_top >= parser->result_stack_alloc) {
        parser->result_stack = m_renew(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc * 2);
        parser->result_stack_alloc *= 2;
    }
Damien's avatar
Damien committed
231
232
233
    parser->result_stack[parser->result_stack_top++] = pn;
}

234
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
235
236
237
    const mp_token_t *tok = mp_lexer_cur(lex);
    mp_parse_node_t pn;
    if (tok->kind == MP_TOKEN_NAME) {
238
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
239
    } else if (tok->kind == MP_TOKEN_NUMBER) {
Damien's avatar
Damien committed
240
241
        bool dec = false;
        bool small_int = true;
242
        machine_int_t int_val = 0;
Damien's avatar
Damien committed
243
244
        int len = tok->len;
        const char *str = tok->str;
245
246
        int base = 0;
        int i = mp_parse_num_base(str, len, &base);
247
        bool overflow = false;
Damien's avatar
Damien committed
248
        for (; i < len; i++) {
249
            machine_int_t old_val = int_val;
250
            if (unichar_isdigit(str[i]) && str[i] - '0' < base) {
Damien's avatar
Damien committed
251
252
253
                int_val = base * int_val + str[i] - '0';
            } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
                int_val = base * int_val + str[i] - 'a' + 10;
254
            } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') {
Damien's avatar
Damien committed
255
                int_val = base * int_val + str[i] - 'A' + 10;
Damien's avatar
Damien committed
256
            } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') {
Damien's avatar
Damien committed
257
258
259
260
261
262
                dec = true;
                break;
            } else {
                small_int = false;
                break;
            }
263
264
265
266
267
268
269
            if (int_val < old_val) {
                // If new value became less than previous, it's overflow
                overflow = true;
            } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
                // If signed number changed sign - it's overflow
                overflow = true;
            }
Damien's avatar
Damien committed
270
271
        }
        if (dec) {
272
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
273
        } else if (small_int && !overflow && MP_PARSE_FITS_SMALL_INT(int_val)) {
274
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
Damien's avatar
Damien committed
275
        } else {
276
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
Damien's avatar
Damien committed
277
        }
278
    } else if (tok->kind == MP_TOKEN_STRING) {
279
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn(tok->str, tok->len));
280
    } else if (tok->kind == MP_TOKEN_BYTES) {
281
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
Damien's avatar
Damien committed
282
    } else {
283
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
Damien's avatar
Damien committed
284
285
286
287
    }
    push_result_node(parser, pn);
}

288
STATIC void push_result_rule(parser_t *parser, int src_line, const rule_t *rule, int num_args) {
289
    mp_parse_node_struct_t *pn = parse_node_new_struct(src_line, rule->rule_id, num_args);
Damien's avatar
Damien committed
290
291
292
    for (int i = num_args; i > 0; i--) {
        pn->nodes[i - 1] = pop_result(parser);
    }
293
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
294
295
}

296
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_parse_error_kind_t *parse_error_kind_out) {
297
298
299
300
301

    // allocate memory for the parser and its stacks

    parser_t *parser = m_new_obj(parser_t);

Damien's avatar
Damien committed
302
303
304
305
    parser->rule_stack_alloc = 64;
    parser->rule_stack_top = 0;
    parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc);

306
    parser->result_stack_alloc = 64;
Damien's avatar
Damien committed
307
    parser->result_stack_top = 0;
308
    parser->result_stack = m_new(mp_parse_node_t, parser->result_stack_alloc);
Damien's avatar
Damien committed
309

310
311
    parser->lexer = lex;

312
    // work out the top-level rule to use, and push it on the stack
Damien's avatar
Damien committed
313
314
    int top_level_rule;
    switch (input_kind) {
315
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
316
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
317
318
        default: top_level_rule = RULE_file_input;
    }
319
    push_rule(parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
320

321
322
    // parse!

323
324
    uint n, i; // state for the current rule
    uint rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
325
    bool backtrack = false;
326
    const rule_t *rule = NULL;
327
    mp_token_kind_t tok_kind;
Damien's avatar
Damien committed
328
329
330
331
332
333
334
335
336
    bool emit_rule;
    bool had_trailing_sep;

    for (;;) {
        next_rule:
        if (parser->rule_stack_top == 0) {
            break;
        }

337
        pop_rule(parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
        printf("depth=%d ", parser->rule_stack_top);
        for (int j = 0; j < parser->rule_stack_top; ++j) {
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
                for (; i < n - 1; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
359
                            if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
360
                                push_result_token(parser, lex);
361
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
362
363
364
365
                                goto next_rule;
                            }
                            break;
                        case RULE_ARG_RULE:
366
367
                            push_rule(parser, rule_src_line, rule, i + 1); // save this or-rule
                            push_rule_from_arg(parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
368
369
370
371
372
373
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }
                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
374
                    if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
375
                        push_result_token(parser, lex);
376
                        mp_lexer_to_next(lex);
Damien's avatar
Damien committed
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
                    } else {
                        backtrack = true;
                        goto next_rule;
                    }
                } else {
                    push_rule_from_arg(parser, rule->arg[i]);
                }
                break;

            case RULE_ACT_AND:

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
393
                        push_result_node(parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
                            // need to match a token
                            tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
412
                            if (mp_lexer_is_kind(lex, tok_kind)) {
Damien's avatar
Damien committed
413
                                // matched token
414
                                if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
415
416
                                    push_result_token(parser, lex);
                                }
417
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
418
419
420
421
422
423
424
425
426
427
428
429
430
431
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
432
433
                            push_rule(parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
                emit_rule = false;
                for (int x = 0; x < n; ++x) {
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                        tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
450
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
451
452
                            emit_rule = true;
                        }
453
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

                // never emit these rules if they have only 1 argument
                // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)]
470
471
                // TODO possibly put varargslist_name, varargslist_equal here as well
                if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_name || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) {
Damien's avatar
Damien committed
472
473
474
475
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
476
                if (ADD_BLANK_NODE(rule->rule_id)) {
Damien's avatar
Damien committed
477
                    emit_rule = true;
478
                    push_result_node(parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
479
480
481
482
483
                    i += 1;
                }

                int num_not_nil = 0;
                for (int x = 0; x < i; ++x) {
484
                    if (peek_result(parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
485
486
487
488
489
                        num_not_nil += 1;
                    }
                }
                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
                if (emit_rule) {
490
                    push_result_rule(parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
491
                } else if (num_not_nil == 0) {
492
                    push_result_rule(parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien's avatar
Damien committed
493
494
495
496
                    //result_stack_show(parser);
                    //assert(0);
                } else if (num_not_nil == 1) {
                    // single result, leave it on stack
497
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
Damien's avatar
Damien committed
498
                    for (int x = 0; x < i; ++x) {
499
500
                        mp_parse_node_t pn2 = pop_result(parser);
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
501
502
503
504
505
                            pn = pn2;
                        }
                    }
                    push_result_node(parser, pn);
                } else {
506
                    push_result_rule(parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
                }
                break;

            case RULE_ACT_LIST:
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
                        uint arg = rule->arg[i & 1 & n];
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
549
                                if (mp_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
550
551
552
553
554
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
                                        push_result_token(parser, lex);
                                    }
555
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
556
557
558
559
560
561
562
563
564
565
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
566
567
                                push_rule(parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(parser, arg); // push child of list-rule
Damien's avatar
Damien committed
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
                                goto next_rule;
                            default:
                                assert(0);
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
587
                        push_result_rule(parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
588
589
590
591
592
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
                    //printf("done list %s %d %d\n", rule->rule_name, n, i);
593
                    push_result_rule(parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
594
595
596
597
598
599
600
                }
                break;

            default:
                assert(0);
        }
    }
601
602

    // check we are at the end of the token stream
603
    if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
604
        goto syntax_error;
Damien's avatar
Damien committed
605
    }
606

Damien's avatar
Damien committed
607
608
    //printf("--------------\n");
    //result_stack_show(parser);
609
610
    //printf("rule stack alloc: %d\n", parser->rule_stack_alloc);
    //printf("result stack alloc: %d\n", parser->result_stack_alloc);
Damien's avatar
Damien committed
611
    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
612
613
614
615
616
617
618
619
620
621
622
623
624

    // get the root parse node that we created
    assert(parser->result_stack_top == 1);
    mp_parse_node_t result = parser->result_stack[0];

finished:
    // free the memory that we don't need anymore
    m_del(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc);
    m_del(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc);
    m_del_obj(parser_t, parser);

    // return the result
    return result;
Damien's avatar
Damien committed
625
626

syntax_error:
627
    if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
628
        *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
629
    } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
630
        *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
631
    } else {
632
        *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
Damien's avatar
Damien committed
633
#ifdef USE_RULE_NAME
634
        // debugging: print the rule name that failed and the token
635
636
        printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
637
        mp_token_show(mp_lexer_cur(lex));
638
#endif
639
#endif
640
    }
641
642
    result = MP_PARSE_NODE_NULL;
    goto finished;
Damien's avatar
Damien committed
643
}