parse.c 29.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
Damien's avatar
Damien committed
28
29
30
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
31
#include <string.h>
Damien's avatar
Damien committed
32
33

#include "misc.h"
34
#include "mpconfig.h"
35
#include "qstr.h"
Damien's avatar
Damien committed
36
#include "lexer.h"
37
#include "parsenumbase.h"
Damien's avatar
Damien committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include "parse.h"

#define RULE_ACT_KIND_MASK      (0xf0)
#define RULE_ACT_ARG_MASK       (0x0f)
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_BLANK          (0x0000)
#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
#define RULE_ARG_OPT_TOK        (0x3000)
#define RULE_ARG_OPT_RULE       (0x4000)

54
55
#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond)

Damien's avatar
Damien committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
    RULE_none = 0,
70
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien's avatar
Damien committed
71
72
73
#include "grammar.h"
#undef DEF_RULE
    RULE_maximum_number_of,
74
    RULE_string, // special node for non-interned string
Damien's avatar
Damien committed
75
76
77
78
79
80
81
};

#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
82
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
83
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
84
#define opt_tok(t)              (RULE_ARG_OPT_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
85
86
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
87
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
88
#else
89
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
90
91
92
93
94
95
96
97
98
99
100
101
102
#endif
#include "grammar.h"
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_tok
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

103
STATIC const rule_t *rules[] = {
Damien's avatar
Damien committed
104
    NULL,
105
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien's avatar
Damien committed
106
107
108
109
110
#include "grammar.h"
#undef DEF_RULE
};

typedef struct _rule_stack_t {
111
112
    unsigned int src_line : 24;
    unsigned int rule_id : 8;
Damien's avatar
Damien committed
113
114
115
116
    int32_t arg_i; // what should be the size and signedness?
} rule_stack_t;

typedef struct _parser_t {
117
118
    bool had_memory_error;

Damien's avatar
Damien committed
119
120
121
122
    uint rule_stack_alloc;
    uint rule_stack_top;
    rule_stack_t *rule_stack;

123
    uint result_stack_alloc;
Damien's avatar
Damien committed
124
    uint result_stack_top;
125
    mp_parse_node_t *result_stack;
126
127

    mp_lexer_t *lexer;
Damien's avatar
Damien committed
128
129
} parser_t;

130
131
132
133
STATIC inline void memory_error(parser_t *parser) {
    parser->had_memory_error = true;
}

134
STATIC void push_rule(parser_t *parser, int src_line, const rule_t *rule, int arg_i) {
135
136
137
    if (parser->had_memory_error) {
        return;
    }
Damien's avatar
Damien committed
138
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
139
        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
140
141
142
143
144
        if (rs == NULL) {
            memory_error(parser);
            return;
        }
        parser->rule_stack = rs;
145
        parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien's avatar
Damien committed
146
    }
147
148
149
150
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
151
152
}

153
STATIC void push_rule_from_arg(parser_t *parser, uint arg) {
Damien's avatar
Damien committed
154
155
156
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
    uint rule_id = arg & RULE_ARG_ARG_MASK;
    assert(rule_id < RULE_maximum_number_of);
157
    push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0);
Damien's avatar
Damien committed
158
159
}

160
STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *src_line) {
161
    assert(!parser->had_memory_error);
Damien's avatar
Damien committed
162
163
164
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
165
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
166
167
}

168
mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
169
170
171
172
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
    return (mp_parse_node_t)(kind | (arg << 5));
Damien's avatar
Damien committed
173
174
}

175
void mp_parse_node_free(mp_parse_node_t pn) {
176
177
178
179
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
        uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
180
181
182
        if (rule_id == RULE_string) {
            return;
        }
183
184
185
186
187
        bool adjust = ADD_BLANK_NODE(rule_id);
        if (adjust) {
            n--;
        }
        for (uint i = 0; i < n; i++) {
188
            mp_parse_node_free(pns->nodes[i]);
189
190
191
        }
        if (adjust) {
            n++;
192
        }
193
        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
194
195
196
    }
}

197
198
#if MICROPY_DEBUG_PRINTERS
void mp_parse_node_print(mp_parse_node_t pn, int indent) {
199
200
201
202
203
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
Damien's avatar
Damien committed
204
205
206
    for (int i = 0; i < indent; i++) {
        printf(" ");
    }
207
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
208
        printf("NULL\n");
209
210
211
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
        machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
        printf("int(" INT_FMT ")\n", arg);
212
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
213
        machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
214
215
216
217
218
219
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
220
            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien's avatar
Damien committed
221
222
223
            default: assert(0);
        }
    } else {
224
        // node must be a mp_parse_node_struct_t
225
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
226
227
228
229
        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
            printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
        } else {
            uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
230
#ifdef USE_RULE_NAME
231
            printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
232
#else
233
            printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
234
#endif
235
236
237
            for (uint i = 0; i < n; i++) {
                mp_parse_node_print(pns->nodes[i], indent + 2);
            }
Damien's avatar
Damien committed
238
239
240
        }
    }
}
241
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
242
243

/*
244
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
245
246
    printf("result stack, most recent first\n");
    for (int i = parser->result_stack_top - 1; i >= 0; i--) {
247
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
248
249
250
251
    }
}
*/

252
STATIC mp_parse_node_t pop_result(parser_t *parser) {
253
254
255
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
256
257
258
259
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

260
STATIC mp_parse_node_t peek_result(parser_t *parser, int pos) {
261
262
263
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
264
265
266
267
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

268
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
269
270
271
    if (parser->had_memory_error) {
        return;
    }
272
    if (parser->result_stack_top >= parser->result_stack_alloc) {
273
        mp_parse_node_t *pn = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
274
275
276
277
278
        if (pn == NULL) {
            memory_error(parser);
            return;
        }
        parser->result_stack = pn;
279
        parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
280
    }
Damien's avatar
Damien committed
281
282
283
    parser->result_stack[parser->result_stack_top++] = pn;
}

284
285
286
287
288
289
290
291
292
293
294
295
296
297
STATIC void push_result_string(parser_t *parser, int src_line, const char *str, uint len) {
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = RULE_string | (2 << 8);
    char *p = m_new(char, len);
    memcpy(p, str, len);
    pn->nodes[0] = (machine_int_t)p;
    pn->nodes[1] = len;
    push_result_node(parser, (mp_parse_node_t)pn);
}
298

299
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
300
301
302
    const mp_token_t *tok = mp_lexer_cur(lex);
    mp_parse_node_t pn;
    if (tok->kind == MP_TOKEN_NAME) {
303
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
304
    } else if (tok->kind == MP_TOKEN_NUMBER) {
Damien's avatar
Damien committed
305
306
        bool dec = false;
        bool small_int = true;
307
        machine_int_t int_val = 0;
Damien's avatar
Damien committed
308
309
        int len = tok->len;
        const char *str = tok->str;
310
311
        int base = 0;
        int i = mp_parse_num_base(str, len, &base);
312
        bool overflow = false;
Damien's avatar
Damien committed
313
        for (; i < len; i++) {
314
            machine_int_t old_val = int_val;
315
            if (unichar_isdigit(str[i]) && str[i] - '0' < base) {
Damien's avatar
Damien committed
316
317
318
                int_val = base * int_val + str[i] - '0';
            } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
                int_val = base * int_val + str[i] - 'a' + 10;
319
            } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') {
Damien's avatar
Damien committed
320
                int_val = base * int_val + str[i] - 'A' + 10;
Damien's avatar
Damien committed
321
            } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') {
Damien's avatar
Damien committed
322
323
324
325
326
327
                dec = true;
                break;
            } else {
                small_int = false;
                break;
            }
328
329
330
331
332
333
334
            if (int_val < old_val) {
                // If new value became less than previous, it's overflow
                overflow = true;
            } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
                // If signed number changed sign - it's overflow
                overflow = true;
            }
Damien's avatar
Damien committed
335
336
        }
        if (dec) {
337
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
338
        } else if (small_int && !overflow && MP_PARSE_FITS_SMALL_INT(int_val)) {
339
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
Damien's avatar
Damien committed
340
        } else {
341
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
Damien's avatar
Damien committed
342
        }
343
    } else if (tok->kind == MP_TOKEN_STRING) {
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
        // Don't automatically intern all strings.  doc strings (which are usually large)
        // will be discarded by the compiler, and so we shouldn't intern them.
        qstr qst = MP_QSTR_NULL;
        if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
            // intern short strings
            qst = qstr_from_strn(tok->str, tok->len);
        } else {
            // check if this string is already interned
            qst = qstr_find_strn((const byte*)tok->str, tok->len);
        }
        if (qst != MP_QSTR_NULL) {
            // qstr exists, make a leaf node
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst);
        } else {
            // not interned, make a node holding a pointer to the string data
            push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len);
            return;
        }
362
    } else if (tok->kind == MP_TOKEN_BYTES) {
363
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
Damien's avatar
Damien committed
364
    } else {
365
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
Damien's avatar
Damien committed
366
367
368
369
    }
    push_result_node(parser, pn);
}

370
STATIC void push_result_rule(parser_t *parser, int src_line, const rule_t *rule, int num_args) {
371
372
373
374
375
376
377
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
Damien's avatar
Damien committed
378
379
380
    for (int i = num_args; i > 0; i--) {
        pn->nodes[i - 1] = pop_result(parser);
    }
381
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
382
383
}

384
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_parse_error_kind_t *parse_error_kind_out) {
385

386
    // initialise parser and allocate memory for its stacks
387

388
    parser_t parser;
389

390
    parser.had_memory_error = false;
391

392
    parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
393
394
    parser.rule_stack_top = 0;
    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien's avatar
Damien committed
395

396
    parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
397
398
    parser.result_stack_top = 0;
    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien's avatar
Damien committed
399

400
401
402
403
404
405
    parser.lexer = lex;

    // check if we could allocate the stacks
    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
        goto memory_error;
    }
406

407
    // work out the top-level rule to use, and push it on the stack
Damien's avatar
Damien committed
408
409
    int top_level_rule;
    switch (input_kind) {
410
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
411
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
412
413
        default: top_level_rule = RULE_file_input;
    }
414
    push_rule(&parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
415

416
417
    // parse!

418
419
    uint n, i; // state for the current rule
    uint rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
420
    bool backtrack = false;
421
    const rule_t *rule = NULL;
422
    mp_token_kind_t tok_kind;
Damien's avatar
Damien committed
423
424
425
426
427
    bool emit_rule;
    bool had_trailing_sep;

    for (;;) {
        next_rule:
428
        if (parser.rule_stack_top == 0 || parser.had_memory_error) {
Damien's avatar
Damien committed
429
430
431
            break;
        }

432
        pop_rule(&parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
433
434
435
436
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
437
438
        printf("depth=%d ", parser.rule_stack_top);
        for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien's avatar
Damien committed
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
                for (; i < n - 1; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
454
                            if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
455
                                push_result_token(&parser, lex);
456
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
457
458
459
460
                                goto next_rule;
                            }
                            break;
                        case RULE_ARG_RULE:
461
462
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
463
464
465
466
467
468
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }
                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
469
                    if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
470
                        push_result_token(&parser, lex);
471
                        mp_lexer_to_next(lex);
Damien's avatar
Damien committed
472
473
474
475
476
                    } else {
                        backtrack = true;
                        goto next_rule;
                    }
                } else {
477
                    push_rule_from_arg(&parser, rule->arg[i]);
Damien's avatar
Damien committed
478
479
480
481
482
483
484
485
486
487
                }
                break;

            case RULE_ACT_AND:

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
488
                        push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
                            // need to match a token
                            tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
507
                            if (mp_lexer_is_kind(lex, tok_kind)) {
Damien's avatar
Damien committed
508
                                // matched token
509
                                if (tok_kind == MP_TOKEN_NAME) {
510
                                    push_result_token(&parser, lex);
Damien's avatar
Damien committed
511
                                }
512
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
513
514
515
516
517
518
519
520
521
522
523
524
525
526
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
527
528
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
                emit_rule = false;
                for (int x = 0; x < n; ++x) {
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                        tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
545
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
546
547
                            emit_rule = true;
                        }
548
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
549
550
551
552
553
554
555
556
557
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

558
559
#if !MICROPY_EMIT_CPYTHON && !MICROPY_ENABLE_DOC_STRING
                // this code discards lonely statements, such as doc strings
560
561
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                    mp_parse_node_t p = peek_result(&parser, 1);
562
563
564
                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
                        pop_result(&parser);
                        pop_result(&parser);
565
                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
566
567
568
569
570
                        break;
                    }
                }
#endif

Damien's avatar
Damien committed
571
572
573
574
575
576
577
                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

                // never emit these rules if they have only 1 argument
                // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)]
578
579
                // TODO possibly put varargslist_name, varargslist_equal here as well
                if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_name || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) {
Damien's avatar
Damien committed
580
581
582
583
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
584
                if (ADD_BLANK_NODE(rule->rule_id)) {
Damien's avatar
Damien committed
585
                    emit_rule = true;
586
                    push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
587
588
589
590
591
                    i += 1;
                }

                int num_not_nil = 0;
                for (int x = 0; x < i; ++x) {
592
                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
593
594
595
596
597
                        num_not_nil += 1;
                    }
                }
                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
                if (emit_rule) {
598
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
599
                } else if (num_not_nil == 0) {
600
                    push_result_rule(&parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien's avatar
Damien committed
601
602
603
604
                    //result_stack_show(parser);
                    //assert(0);
                } else if (num_not_nil == 1) {
                    // single result, leave it on stack
605
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
Damien's avatar
Damien committed
606
                    for (int x = 0; x < i; ++x) {
607
                        mp_parse_node_t pn2 = pop_result(&parser);
608
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
609
610
611
                            pn = pn2;
                        }
                    }
612
                    push_result_node(&parser, pn);
Damien's avatar
Damien committed
613
                } else {
614
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
                }
                break;

            case RULE_ACT_LIST:
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
                        uint arg = rule->arg[i & 1 & n];
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
657
                                if (mp_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
658
659
660
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
661
                                        push_result_token(&parser, lex);
Damien's avatar
Damien committed
662
                                    }
663
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
664
665
666
667
668
669
670
671
672
673
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
674
675
                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(&parser, arg); // push child of list-rule
Damien's avatar
Damien committed
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
                                goto next_rule;
                            default:
                                assert(0);
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
695
                        push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
696
697
698
699
700
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
                    //printf("done list %s %d %d\n", rule->rule_name, n, i);
701
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
702
703
704
705
706
707
708
                }
                break;

            default:
                assert(0);
        }
    }
709

710
711
712
    mp_parse_node_t result;

    // check if we had a memory error
713
714
    if (parser.had_memory_error) {
memory_error:
715
716
717
718
719
720
        *parse_error_kind_out = MP_PARSE_ERROR_MEMORY;
        result = MP_PARSE_NODE_NULL;
        goto finished;

    }

721
    // check we are at the end of the token stream
722
    if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
723
        goto syntax_error;
Damien's avatar
Damien committed
724
    }
725

Damien's avatar
Damien committed
726
727
    //printf("--------------\n");
    //result_stack_show(parser);
728
729
    //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
    //printf("result stack alloc: %d\n", parser.result_stack_alloc);
Damien's avatar
Damien committed
730
    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
731
732

    // get the root parse node that we created
733
734
    assert(parser.result_stack_top == 1);
    result = parser.result_stack[0];
735
736
737

finished:
    // free the memory that we don't need anymore
738
739
    m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
    m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
740
741
742

    // return the result
    return result;
Damien's avatar
Damien committed
743
744

syntax_error:
745
    if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
746
        *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
747
    } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
748
        *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
749
    } else {
750
        *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
Damien's avatar
Damien committed
751
#ifdef USE_RULE_NAME
752
        // debugging: print the rule name that failed and the token
753
754
        printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
755
        mp_token_show(mp_lexer_cur(lex));
756
#endif
757
#endif
758
    }
759
760
    result = MP_PARSE_NODE_NULL;
    goto finished;
Damien's avatar
Damien committed
761
}