parse.c 29.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
Damien's avatar
Damien committed
28
29
30
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
31
#include <memory.h>
Damien's avatar
Damien committed
32
33

#include "misc.h"
34
#include "mpconfig.h"
35
#include "qstr.h"
Damien's avatar
Damien committed
36
#include "lexer.h"
37
#include "parsenumbase.h"
Damien's avatar
Damien committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include "parse.h"

#define RULE_ACT_KIND_MASK      (0xf0)
#define RULE_ACT_ARG_MASK       (0x0f)
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_BLANK          (0x0000)
#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
#define RULE_ARG_OPT_TOK        (0x3000)
#define RULE_ARG_OPT_RULE       (0x4000)

54
55
#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond)

Damien's avatar
Damien committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
    RULE_none = 0,
70
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien's avatar
Damien committed
71
72
73
#include "grammar.h"
#undef DEF_RULE
    RULE_maximum_number_of,
74
    RULE_string,
Damien's avatar
Damien committed
75
76
77
78
79
80
81
};

#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
82
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
83
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
84
#define opt_tok(t)              (RULE_ARG_OPT_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
85
86
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
87
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
88
#else
89
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
90
91
92
93
94
95
96
97
98
99
100
101
102
#endif
#include "grammar.h"
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_tok
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

103
STATIC const rule_t *rules[] = {
Damien's avatar
Damien committed
104
    NULL,
105
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien's avatar
Damien committed
106
107
108
109
110
#include "grammar.h"
#undef DEF_RULE
};

typedef struct _rule_stack_t {
111
112
    unsigned int src_line : 24;
    unsigned int rule_id : 8;
Damien's avatar
Damien committed
113
114
115
116
    int32_t arg_i; // what should be the size and signedness?
} rule_stack_t;

typedef struct _parser_t {
117
118
    bool had_memory_error;

Damien's avatar
Damien committed
119
120
121
122
    uint rule_stack_alloc;
    uint rule_stack_top;
    rule_stack_t *rule_stack;

123
    uint result_stack_alloc;
Damien's avatar
Damien committed
124
    uint result_stack_top;
125
    mp_parse_node_t *result_stack;
126
127

    mp_lexer_t *lexer;
Damien's avatar
Damien committed
128
129
} parser_t;

130
131
132
133
STATIC inline void memory_error(parser_t *parser) {
    parser->had_memory_error = true;
}

134
STATIC void push_rule(parser_t *parser, int src_line, const rule_t *rule, int arg_i) {
135
136
137
    if (parser->had_memory_error) {
        return;
    }
Damien's avatar
Damien committed
138
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
139
        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
140
141
142
143
144
        if (rs == NULL) {
            memory_error(parser);
            return;
        }
        parser->rule_stack = rs;
145
        parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien's avatar
Damien committed
146
    }
147
148
149
150
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
151
152
}

153
STATIC void push_rule_from_arg(parser_t *parser, uint arg) {
Damien's avatar
Damien committed
154
155
156
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
    uint rule_id = arg & RULE_ARG_ARG_MASK;
    assert(rule_id < RULE_maximum_number_of);
157
    push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0);
Damien's avatar
Damien committed
158
159
}

160
STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *src_line) {
161
    assert(!parser->had_memory_error);
Damien's avatar
Damien committed
162
163
164
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
165
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
166
167
}

168
mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
169
170
171
172
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
    return (mp_parse_node_t)(kind | (arg << 5));
Damien's avatar
Damien committed
173
174
}

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
uint mp_parse_node_free(mp_parse_node_t pn) {
    uint cnt = 0;
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
        uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
        bool adjust = ADD_BLANK_NODE(rule_id);
        if (adjust) {
            n--;
        }
        for (uint i = 0; i < n; i++) {
            cnt += mp_parse_node_free(pns->nodes[i]);
        }
        if (adjust) {
            n++;
190
        }
191
        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
192
193
194
195
196
        cnt++;
    }
    return cnt;
}

197
198
#if MICROPY_DEBUG_PRINTERS
void mp_parse_node_print(mp_parse_node_t pn, int indent) {
199
200
201
202
203
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
Damien's avatar
Damien committed
204
205
206
    for (int i = 0; i < indent; i++) {
        printf(" ");
    }
207
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
208
        printf("NULL\n");
209
210
211
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
        machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
        printf("int(" INT_FMT ")\n", arg);
212
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
213
        machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
214
215
216
217
218
219
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
220
            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien's avatar
Damien committed
221
222
            default: assert(0);
        }
223
224
225
    } else if (MP_PARSE_NODE_IS_STRING(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
        printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
Damien's avatar
Damien committed
226
    } else {
227
228
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
        uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
229
#ifdef USE_RULE_NAME
230
        printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
231
#else
232
        printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
233
#endif
234
235
        for (uint i = 0; i < n; i++) {
            mp_parse_node_print(pns->nodes[i], indent + 2);
Damien's avatar
Damien committed
236
237
238
        }
    }
}
239
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
240
241

/*
242
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
243
244
    printf("result stack, most recent first\n");
    for (int i = parser->result_stack_top - 1; i >= 0; i--) {
245
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
246
247
248
249
    }
}
*/

250
STATIC mp_parse_node_t pop_result(parser_t *parser) {
251
252
253
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
254
255
256
257
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

258
STATIC mp_parse_node_t peek_result(parser_t *parser, int pos) {
259
260
261
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
262
263
264
265
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

266
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
267
268
269
    if (parser->had_memory_error) {
        return;
    }
270
    if (parser->result_stack_top >= parser->result_stack_alloc) {
271
        mp_parse_node_t *pn = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
272
273
274
275
276
        if (pn == NULL) {
            memory_error(parser);
            return;
        }
        parser->result_stack = pn;
277
        parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
278
    }
Damien's avatar
Damien committed
279
280
281
    parser->result_stack[parser->result_stack_top++] = pn;
}

282
283
STATIC void push_string(parser_t *parser, int src_line, const char *str, uint len);

284
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
285
286
287
    const mp_token_t *tok = mp_lexer_cur(lex);
    mp_parse_node_t pn;
    if (tok->kind == MP_TOKEN_NAME) {
288
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
289
    } else if (tok->kind == MP_TOKEN_NUMBER) {
Damien's avatar
Damien committed
290
291
        bool dec = false;
        bool small_int = true;
292
        machine_int_t int_val = 0;
Damien's avatar
Damien committed
293
294
        int len = tok->len;
        const char *str = tok->str;
295
296
        int base = 0;
        int i = mp_parse_num_base(str, len, &base);
297
        bool overflow = false;
Damien's avatar
Damien committed
298
        for (; i < len; i++) {
299
            machine_int_t old_val = int_val;
300
            if (unichar_isdigit(str[i]) && str[i] - '0' < base) {
Damien's avatar
Damien committed
301
302
303
                int_val = base * int_val + str[i] - '0';
            } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
                int_val = base * int_val + str[i] - 'a' + 10;
304
            } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') {
Damien's avatar
Damien committed
305
                int_val = base * int_val + str[i] - 'A' + 10;
Damien's avatar
Damien committed
306
            } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') {
Damien's avatar
Damien committed
307
308
309
310
311
312
                dec = true;
                break;
            } else {
                small_int = false;
                break;
            }
313
314
315
316
317
318
319
            if (int_val < old_val) {
                // If new value became less than previous, it's overflow
                overflow = true;
            } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
                // If signed number changed sign - it's overflow
                overflow = true;
            }
Damien's avatar
Damien committed
320
321
        }
        if (dec) {
322
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
323
        } else if (small_int && !overflow && MP_PARSE_FITS_SMALL_INT(int_val)) {
324
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
Damien's avatar
Damien committed
325
        } else {
326
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
Damien's avatar
Damien committed
327
        }
328
    } else if (tok->kind == MP_TOKEN_STRING) {
329
330
331
332
printf("Pushing string\n");
        push_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len);
        return;
//        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn(tok->str, tok->len));
333
    } else if (tok->kind == MP_TOKEN_BYTES) {
334
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
Damien's avatar
Damien committed
335
    } else {
336
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
Damien's avatar
Damien committed
337
338
339
340
    }
    push_result_node(parser, pn);
}

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
STATIC void push_string(parser_t *parser, int src_line, const char *str, uint len) {
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = RULE_string | (2 << 8);
    char *p = m_new(char, len);
    memcpy(p, str, len);
    pn->nodes[0] = (machine_int_t)p;
    pn->nodes[1] = len;
    push_result_node(parser, (mp_parse_node_t)pn);
}

356
STATIC void push_result_rule(parser_t *parser, int src_line, const rule_t *rule, int num_args) {
357
358
359
360
361
362
363
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
Damien's avatar
Damien committed
364
365
366
    for (int i = num_args; i > 0; i--) {
        pn->nodes[i - 1] = pop_result(parser);
    }
367
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
368
369
}

370
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_parse_error_kind_t *parse_error_kind_out) {
371

372
    // initialise parser and allocate memory for its stacks
373

374
    parser_t parser;
375

376
    parser.had_memory_error = false;
377

378
    parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
379
380
    parser.rule_stack_top = 0;
    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien's avatar
Damien committed
381

382
    parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
383
384
    parser.result_stack_top = 0;
    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien's avatar
Damien committed
385

386
387
388
389
390
391
    parser.lexer = lex;

    // check if we could allocate the stacks
    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
        goto memory_error;
    }
392

393
    // work out the top-level rule to use, and push it on the stack
Damien's avatar
Damien committed
394
395
    int top_level_rule;
    switch (input_kind) {
396
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
397
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
398
399
        default: top_level_rule = RULE_file_input;
    }
400
    push_rule(&parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
401

402
403
    // parse!

404
405
    uint n, i; // state for the current rule
    uint rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
406
    bool backtrack = false;
407
    const rule_t *rule = NULL;
408
    mp_token_kind_t tok_kind;
Damien's avatar
Damien committed
409
410
411
412
413
    bool emit_rule;
    bool had_trailing_sep;

    for (;;) {
        next_rule:
414
        if (parser.rule_stack_top == 0 || parser.had_memory_error) {
Damien's avatar
Damien committed
415
416
417
            break;
        }

418
        pop_rule(&parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
419
420
421
422
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
423
424
        printf("depth=%d ", parser.rule_stack_top);
        for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien's avatar
Damien committed
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
                for (; i < n - 1; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
440
                            if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
441
                                push_result_token(&parser, lex);
442
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
443
444
445
446
                                goto next_rule;
                            }
                            break;
                        case RULE_ARG_RULE:
447
448
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
449
450
451
452
453
454
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }
                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
455
                    if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
456
                        push_result_token(&parser, lex);
457
                        mp_lexer_to_next(lex);
Damien's avatar
Damien committed
458
459
460
461
462
                    } else {
                        backtrack = true;
                        goto next_rule;
                    }
                } else {
463
                    push_rule_from_arg(&parser, rule->arg[i]);
Damien's avatar
Damien committed
464
465
466
467
468
469
470
471
472
473
                }
                break;

            case RULE_ACT_AND:

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
474
                        push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
                            // need to match a token
                            tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
493
                            if (mp_lexer_is_kind(lex, tok_kind)) {
Damien's avatar
Damien committed
494
                                // matched token
495
                                if (tok_kind == MP_TOKEN_NAME) {
496
                                    push_result_token(&parser, lex);
Damien's avatar
Damien committed
497
                                }
498
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
499
500
501
502
503
504
505
506
507
508
509
510
511
512
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
513
514
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
                emit_rule = false;
                for (int x = 0; x < n; ++x) {
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                        tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
531
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
532
533
                            emit_rule = true;
                        }
534
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
535
536
537
538
539
540
541
542
543
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

544
#if 1 && !MICROPY_ENABLE_DOC_STRING
545
546
                // this code discards lonely statement, such as doc strings
                // problem is that doc strings have already been interned, so this doesn't really help reduce RAM usage
547
548
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                    mp_parse_node_t p = peek_result(&parser, 1);
549
                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRING(p)) {
550
551
                        pop_result(parser);
                        pop_result(parser);
552
                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
553
554
555
556
557
                        break;
                    }
                }
#endif

Damien's avatar
Damien committed
558
559
560
561
562
563
564
                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

                // never emit these rules if they have only 1 argument
                // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)]
565
566
                // TODO possibly put varargslist_name, varargslist_equal here as well
                if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_name || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) {
Damien's avatar
Damien committed
567
568
569
570
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
571
                if (ADD_BLANK_NODE(rule->rule_id)) {
Damien's avatar
Damien committed
572
                    emit_rule = true;
573
                    push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
574
575
576
577
578
                    i += 1;
                }

                int num_not_nil = 0;
                for (int x = 0; x < i; ++x) {
579
                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
580
581
582
583
584
                        num_not_nil += 1;
                    }
                }
                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
                if (emit_rule) {
585
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
586
                } else if (num_not_nil == 0) {
587
                    push_result_rule(&parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien's avatar
Damien committed
588
589
590
591
                    //result_stack_show(parser);
                    //assert(0);
                } else if (num_not_nil == 1) {
                    // single result, leave it on stack
592
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
Damien's avatar
Damien committed
593
                    for (int x = 0; x < i; ++x) {
594
                        mp_parse_node_t pn2 = pop_result(&parser);
595
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
596
597
598
                            pn = pn2;
                        }
                    }
599
                    push_result_node(&parser, pn);
Damien's avatar
Damien committed
600
                } else {
601
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
                }
                break;

            case RULE_ACT_LIST:
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
                        uint arg = rule->arg[i & 1 & n];
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
644
                                if (mp_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
645
646
647
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
648
                                        push_result_token(&parser, lex);
Damien's avatar
Damien committed
649
                                    }
650
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
651
652
653
654
655
656
657
658
659
660
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
661
662
                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(&parser, arg); // push child of list-rule
Damien's avatar
Damien committed
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
                                goto next_rule;
                            default:
                                assert(0);
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
682
                        push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
683
684
685
686
687
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
                    //printf("done list %s %d %d\n", rule->rule_name, n, i);
688
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
689
690
691
692
693
694
695
                }
                break;

            default:
                assert(0);
        }
    }
696

697
698
699
    mp_parse_node_t result;

    // check if we had a memory error
700
701
    if (parser.had_memory_error) {
memory_error:
702
703
704
705
706
707
        *parse_error_kind_out = MP_PARSE_ERROR_MEMORY;
        result = MP_PARSE_NODE_NULL;
        goto finished;

    }

708
    // check we are at the end of the token stream
709
    if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
710
        goto syntax_error;
Damien's avatar
Damien committed
711
    }
712

Damien's avatar
Damien committed
713
714
    //printf("--------------\n");
    //result_stack_show(parser);
715
716
    //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
    //printf("result stack alloc: %d\n", parser.result_stack_alloc);
Damien's avatar
Damien committed
717
    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
718
719

    // get the root parse node that we created
720
721
    assert(parser.result_stack_top == 1);
    result = parser.result_stack[0];
722
723
724

finished:
    // free the memory that we don't need anymore
725
726
    m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
    m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
727
728
729

    // return the result
    return result;
Damien's avatar
Damien committed
730
731

syntax_error:
732
    if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
733
        *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
734
    } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
735
        *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
736
    } else {
737
        *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
Damien's avatar
Damien committed
738
#ifdef USE_RULE_NAME
739
        // debugging: print the rule name that failed and the token
740
741
        printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
742
        mp_token_show(mp_lexer_cur(lex));
743
#endif
744
#endif
745
    }
746
747
    result = MP_PARSE_NODE_NULL;
    goto finished;
Damien's avatar
Damien committed
748
}