parse.c 30.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
Damien's avatar
Damien committed
28
29
30
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
31
#include <string.h>
Damien's avatar
Damien committed
32

33
34
35
36
#include "py/lexer.h"
#include "py/parse.h"
#include "py/parsenum.h"
#include "py/smallint.h"
Damien's avatar
Damien committed
37
38

#define RULE_ACT_ARG_MASK       (0x0f)
39
40
41
#define RULE_ACT_KIND_MASK      (0x30)
#define RULE_ACT_ALLOW_IDENT    (0x40)
#define RULE_ACT_ADD_BLANK      (0x80)
Damien's avatar
Damien committed
42
43
44
45
46
47
48
49
50
51
52
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
#define RULE_ARG_OPT_TOK        (0x3000)
#define RULE_ARG_OPT_RULE       (0x4000)

53
#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
54

Damien's avatar
Damien committed
55
56
57
58
59
60
61
62
63
64
65
66
67
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
68
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
69
#include "py/grammar.h"
Damien's avatar
Damien committed
70
71
#undef DEF_RULE
    RULE_maximum_number_of,
72
    RULE_string, // special node for non-interned string
73
    RULE_bytes, // special node for non-interned bytes
Damien's avatar
Damien committed
74
75
};

76
77
#define ident                   (RULE_ACT_ALLOW_IDENT)
#define blank                   (RULE_ACT_ADD_BLANK)
Damien's avatar
Damien committed
78
79
80
81
82
#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
83
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
84
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
85
#define opt_tok(t)              (RULE_ARG_OPT_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
86
87
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
88
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
89
#else
90
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
91
#endif
92
#include "py/grammar.h"
Damien's avatar
Damien committed
93
94
95
96
97
98
99
100
101
102
103
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_tok
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

104
STATIC const rule_t *rules[] = {
105
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
106
#include "py/grammar.h"
Damien's avatar
Damien committed
107
108
109
110
#undef DEF_RULE
};

typedef struct _rule_stack_t {
111
112
113
    mp_uint_t src_line : 24;
    mp_uint_t rule_id : 8;
    mp_uint_t arg_i : 32; // what should the bit-size be?
Damien's avatar
Damien committed
114
115
116
} rule_stack_t;

typedef struct _parser_t {
117
118
    bool had_memory_error;

119
120
    mp_uint_t rule_stack_alloc;
    mp_uint_t rule_stack_top;
Damien's avatar
Damien committed
121
122
    rule_stack_t *rule_stack;

123
124
    mp_uint_t result_stack_alloc;
    mp_uint_t result_stack_top;
125
    mp_parse_node_t *result_stack;
126
127

    mp_lexer_t *lexer;
Damien's avatar
Damien committed
128
129
} parser_t;

130
131
132
133
STATIC inline void memory_error(parser_t *parser) {
    parser->had_memory_error = true;
}

134
STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) {
135
136
137
    if (parser->had_memory_error) {
        return;
    }
Damien's avatar
Damien committed
138
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
139
        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
140
141
142
143
144
        if (rs == NULL) {
            memory_error(parser);
            return;
        }
        parser->rule_stack = rs;
145
        parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien's avatar
Damien committed
146
    }
147
148
149
150
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
151
152
}

153
STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
Damien's avatar
Damien committed
154
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
155
    mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien's avatar
Damien committed
156
    assert(rule_id < RULE_maximum_number_of);
157
    push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien's avatar
Damien committed
158
159
}

160
STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
161
    assert(!parser->had_memory_error);
Damien's avatar
Damien committed
162
163
164
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
165
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
166
167
}

168
mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg) {
169
170
171
172
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
    return (mp_parse_node_t)(kind | (arg << 5));
Damien's avatar
Damien committed
173
174
}

175
void mp_parse_node_free(mp_parse_node_t pn) {
176
177
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
178
179
        mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
180
        if (rule_id == RULE_string || rule_id == RULE_bytes) {
181
            m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]);
182
        } else {
183
            bool adjust = ADD_BLANK_NODE(rules[rule_id]);
184
185
186
187
188
189
190
191
192
            if (adjust) {
                n--;
            }
            for (mp_uint_t i = 0; i < n; i++) {
                mp_parse_node_free(pns->nodes[i]);
            }
            if (adjust) {
                n++;
            }
193
        }
194
        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
195
196
197
    }
}

198
#if MICROPY_DEBUG_PRINTERS
199
void mp_parse_node_print(mp_parse_node_t pn, mp_uint_t indent) {
200
201
202
203
204
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
205
    for (mp_uint_t i = 0; i < indent; i++) {
Damien's avatar
Damien committed
206
207
        printf(" ");
    }
208
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
209
        printf("NULL\n");
210
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
211
        mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
212
        printf("int(" INT_FMT ")\n", arg);
213
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
214
        mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
215
216
217
218
219
220
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
221
            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien's avatar
Damien committed
222
223
224
            default: assert(0);
        }
    } else {
225
        // node must be a mp_parse_node_struct_t
226
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
227
228
        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
            printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
229
230
        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
            printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
231
        } else {
232
            mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
233
#ifdef USE_RULE_NAME
234
            printf("%s(" UINT_FMT ") (n=" UINT_FMT ")\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
235
#else
236
            printf("rule(" UINT_FMT ") (n=" UINT_FMT ")\n", (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
237
#endif
238
            for (mp_uint_t i = 0; i < n; i++) {
239
240
                mp_parse_node_print(pns->nodes[i], indent + 2);
            }
Damien's avatar
Damien committed
241
242
243
        }
    }
}
244
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
245
246

/*
247
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
248
    printf("result stack, most recent first\n");
249
    for (mp_int_t i = parser->result_stack_top - 1; i >= 0; i--) {
250
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
251
252
253
254
    }
}
*/

255
STATIC mp_parse_node_t pop_result(parser_t *parser) {
256
257
258
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
259
260
261
262
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

263
STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) {
264
265
266
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
267
268
269
270
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

271
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
272
273
274
    if (parser->had_memory_error) {
        return;
    }
275
    if (parser->result_stack_top >= parser->result_stack_alloc) {
276
277
        mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
        if (stack == NULL) {
278
279
280
            memory_error(parser);
            return;
        }
281
        parser->result_stack = stack;
282
        parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
283
    }
Damien's avatar
Damien committed
284
285
286
    parser->result_stack[parser->result_stack_top++] = pn;
}

287
STATIC void push_result_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) {
288
289
290
291
292
293
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
294
    pn->kind_num_nodes = rule_kind | (2 << 8);
295
296
    char *p = m_new(char, len);
    memcpy(p, str, len);
297
    pn->nodes[0] = (mp_int_t)p;
298
299
300
    pn->nodes[1] = len;
    push_result_node(parser, (mp_parse_node_t)pn);
}
301

302
STATIC void push_result_token(parser_t *parser) {
303
    mp_parse_node_t pn;
304
305
306
307
    mp_lexer_t *lex = parser->lexer;
    if (lex->tok_kind == MP_TOKEN_NAME) {
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
    } else if (lex->tok_kind == MP_TOKEN_NUMBER) {
Damien's avatar
Damien committed
308
309
        bool dec = false;
        bool small_int = true;
310
        mp_int_t int_val = 0;
311
312
        mp_uint_t len = lex->vstr.len;
        const char *str = lex->vstr.buf;
313
314
        mp_uint_t base = 0;
        mp_uint_t i = mp_parse_num_base(str, len, &base);
315
        bool overflow = false;
Damien's avatar
Damien committed
316
        for (; i < len; i++) {
317
            mp_uint_t dig;
318
            int clower = str[i] | 0x20;
319
            if (unichar_isdigit(str[i]) && (mp_uint_t)(str[i] - '0') < base) {
320
                dig = str[i] - '0';
321
322
323
            } else if (base == 16 && 'a' <= clower && clower <= 'f') {
                dig = clower - 'a' + 10;
            } else if (str[i] == '.' || clower == 'e' || clower == 'j') {
Damien's avatar
Damien committed
324
325
326
327
328
329
                dec = true;
                break;
            } else {
                small_int = false;
                break;
            }
330
331
            // add next digi and check for overflow
            if (mp_small_int_mul_overflow(int_val, base)) {
332
                overflow = true;
333
334
335
            }
            int_val = int_val * base + dig;
            if (!MP_SMALL_INT_FITS(int_val)) {
336
337
                overflow = true;
            }
Damien's avatar
Damien committed
338
339
        }
        if (dec) {
340
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
341
        } else if (small_int && !overflow && MP_SMALL_INT_FITS(int_val)) {
342
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
Damien's avatar
Damien committed
343
        } else {
344
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
Damien's avatar
Damien committed
345
        }
346
347
    } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
        // Don't automatically intern all strings/bytes.  doc strings (which are usually large)
348
349
        // will be discarded by the compiler, and so we shouldn't intern them.
        qstr qst = MP_QSTR_NULL;
350
        if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
351
            // intern short strings
352
            qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
353
354
        } else {
            // check if this string is already interned
355
            qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
356
357
358
        }
        if (qst != MP_QSTR_NULL) {
            // qstr exists, make a leaf node
359
            pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
360
        } else {
361
362
            // not interned, make a node holding a pointer to the string/bytes data
            push_result_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
363
364
            return;
        }
Damien's avatar
Damien committed
365
    } else {
366
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien's avatar
Damien committed
367
368
369
370
    }
    push_result_node(parser, pn);
}

371
STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) {
372
373
374
375
376
377
378
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
379
    for (mp_uint_t i = num_args; i > 0; i--) {
Damien's avatar
Damien committed
380
381
        pn->nodes[i - 1] = pop_result(parser);
    }
382
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
383
384
}

385
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_parse_error_kind_t *parse_error_kind_out) {
386

387
    // initialise parser and allocate memory for its stacks
388

389
    parser_t parser;
390

391
    parser.had_memory_error = false;
392

393
    parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
394
395
    parser.rule_stack_top = 0;
    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien's avatar
Damien committed
396

397
    parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
398
399
    parser.result_stack_top = 0;
    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien's avatar
Damien committed
400

401
402
403
404
405
406
    parser.lexer = lex;

    // check if we could allocate the stacks
    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
        goto memory_error;
    }
407

408
    // work out the top-level rule to use, and push it on the stack
409
    mp_uint_t top_level_rule;
Damien's avatar
Damien committed
410
    switch (input_kind) {
411
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
412
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
413
414
        default: top_level_rule = RULE_file_input;
    }
415
    push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
416

417
418
    // parse!

419
420
    mp_uint_t n, i; // state for the current rule
    mp_uint_t rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
421
    bool backtrack = false;
422
    const rule_t *rule = NULL;
Damien's avatar
Damien committed
423
424
425

    for (;;) {
        next_rule:
426
        if (parser.rule_stack_top == 0 || parser.had_memory_error) {
Damien's avatar
Damien committed
427
428
429
            break;
        }

430
        pop_rule(&parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
431
432
433
434
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
435
436
        printf("depth=%d ", parser.rule_stack_top);
        for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien's avatar
Damien committed
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
                for (; i < n - 1; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
452
453
                            if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                                push_result_token(&parser);
454
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
455
456
457
458
                                goto next_rule;
                            }
                            break;
                        case RULE_ARG_RULE:
459
                        rule_or_no_other_choice:
460
461
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
462
463
464
                            goto next_rule;
                        default:
                            assert(0);
465
                            goto rule_or_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
466
467
468
                    }
                }
                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
469
470
                    if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                        push_result_token(&parser);
471
                        mp_lexer_to_next(lex);
Damien's avatar
Damien committed
472
473
474
475
476
                    } else {
                        backtrack = true;
                        goto next_rule;
                    }
                } else {
477
                    push_rule_from_arg(&parser, rule->arg[i]);
Damien's avatar
Damien committed
478
479
480
                }
                break;

481
            case RULE_ACT_AND: {
Damien's avatar
Damien committed
482
483
484
485
486
487

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
488
                        push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
504
                        case RULE_ARG_TOK: {
Damien's avatar
Damien committed
505
                            // need to match a token
506
                            mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
507
                            if (lex->tok_kind == tok_kind) {
Damien's avatar
Damien committed
508
                                // matched token
509
                                if (tok_kind == MP_TOKEN_NAME) {
510
                                    push_result_token(&parser);
Damien's avatar
Damien committed
511
                                }
512
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
513
514
515
516
517
518
519
520
521
522
523
524
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
525
                        }
Damien's avatar
Damien committed
526
527
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
528
                        rule_and_no_other_choice:
529
530
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
531
532
533
                            goto next_rule;
                        default:
                            assert(0);
534
                            goto rule_and_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
535
536
537
538
539
540
541
542
543
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
544
                bool emit_rule = false;
545
                for (mp_uint_t x = 0; x < n; ++x) {
Damien's avatar
Damien committed
546
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
547
                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
548
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
549
550
                            emit_rule = true;
                        }
551
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
552
553
554
555
556
557
558
559
560
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

561
562
#if !MICROPY_EMIT_CPYTHON && !MICROPY_ENABLE_DOC_STRING
                // this code discards lonely statements, such as doc strings
563
564
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                    mp_parse_node_t p = peek_result(&parser, 1);
565
                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
566
567
                        pop_result(&parser); // MP_PARSE_NODE_NULL
                        mp_parse_node_free(pop_result(&parser)); // RULE_string
568
                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
569
570
571
572
573
                        break;
                    }
                }
#endif

Damien's avatar
Damien committed
574
575
576
577
578
                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

579
580
581
582
583
584
                // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
                // rule should not be emitted if it has only 1 argument
                // NOTE: can't set this flag for atom_paren because we need it
                // to distinguish, for example, [a,b] from [(a,b)]
                // TODO possibly set for: varargslist_name, varargslist_equal
                if (rule->act & RULE_ACT_ALLOW_IDENT) {
Damien's avatar
Damien committed
585
586
587
588
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
589
                if (ADD_BLANK_NODE(rule)) {
Damien's avatar
Damien committed
590
                    emit_rule = true;
591
                    push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
592
593
594
                    i += 1;
                }

595
596
                mp_uint_t num_not_nil = 0;
                for (mp_uint_t x = 0; x < i; ++x) {
597
                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
598
599
600
601
602
                        num_not_nil += 1;
                    }
                }
                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
                if (emit_rule) {
603
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
604
                } else if (num_not_nil == 0) {
605
                    push_result_rule(&parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien's avatar
Damien committed
606
607
608
609
                    //result_stack_show(parser);
                    //assert(0);
                } else if (num_not_nil == 1) {
                    // single result, leave it on stack
610
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
611
                    for (mp_uint_t x = 0; x < i; ++x) {
612
                        mp_parse_node_t pn2 = pop_result(&parser);
613
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
614
615
616
                            pn = pn2;
                        }
                    }
617
                    push_result_node(&parser, pn);
Damien's avatar
Damien committed
618
                } else {
619
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
620
621
                }
                break;
622
            }
Damien's avatar
Damien committed
623

624
            case RULE_ACT_LIST: {
Damien's avatar
Damien committed
625
626
627
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
628
                bool had_trailing_sep;
Damien's avatar
Damien committed
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
661
                        mp_uint_t arg = rule->arg[i & 1 & n];
Damien's avatar
Damien committed
662
663
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
664
                                if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
665
666
667
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
668
                                        push_result_token(&parser);
Damien's avatar
Damien committed
669
                                    }
670
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
671
672
673
674
675
676
677
678
679
680
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
681
                            rule_list_no_other_choice:
682
683
                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(&parser, arg); // push child of list-rule
Damien's avatar
Damien committed
684
685
686
                                goto next_rule;
                            default:
                                assert(0);
687
                                goto rule_list_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
704
                        push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
705
706
707
708
709
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
                    //printf("done list %s %d %d\n", rule->rule_name, n, i);
710
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
711
712
                }
                break;
713
            }
Damien's avatar
Damien committed
714
715
716
717
718

            default:
                assert(0);
        }
    }
719

720
721
722
    mp_parse_node_t result;

    // check if we had a memory error
723
724
    if (parser.had_memory_error) {
memory_error:
725
726
727
728
729
730
        *parse_error_kind_out = MP_PARSE_ERROR_MEMORY;
        result = MP_PARSE_NODE_NULL;
        goto finished;

    }

731
    // check we are at the end of the token stream
732
    if (lex->tok_kind != MP_TOKEN_END) {
733
        goto syntax_error;
Damien's avatar
Damien committed
734
    }
735

Damien's avatar
Damien committed
736
737
    //printf("--------------\n");
    //result_stack_show(parser);
738
739
    //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
    //printf("result stack alloc: %d\n", parser.result_stack_alloc);
Damien's avatar
Damien committed
740
    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
741
742

    // get the root parse node that we created
743
744
    assert(parser.result_stack_top == 1);
    result = parser.result_stack[0];
745
746
747

finished:
    // free the memory that we don't need anymore
748
749
    m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
    m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
750
751
752

    // return the result
    return result;
Damien's avatar
Damien committed
753
754

syntax_error:
755
    if (lex->tok_kind == MP_TOKEN_INDENT) {
756
        *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
757
    } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
758
        *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
759
    } else {
760
        *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
Damien's avatar
Damien committed
761
#ifdef USE_RULE_NAME
762
        // debugging: print the rule name that failed and the token
763
764
        printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
765
        mp_token_show(lex);
766
#endif
767
#endif
768
    }
769
770
    result = MP_PARSE_NODE_NULL;
    goto finished;
Damien's avatar
Damien committed
771
}