parse.c 29.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
Damien's avatar
Damien committed
28
29
30
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
31
#include <string.h>
Damien's avatar
Damien committed
32

33
#include "mpconfig.h"
34
#include "misc.h"
35
#include "qstr.h"
Damien's avatar
Damien committed
36
#include "lexer.h"
37
#include "parsenumbase.h"
Damien's avatar
Damien committed
38
#include "parse.h"
39
#include "smallint.h"
Damien's avatar
Damien committed
40
41

#define RULE_ACT_ARG_MASK       (0x0f)
42
43
44
#define RULE_ACT_KIND_MASK      (0x30)
#define RULE_ACT_ALLOW_IDENT    (0x40)
#define RULE_ACT_ADD_BLANK      (0x80)
Damien's avatar
Damien committed
45
46
47
48
49
50
51
52
53
54
55
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
#define RULE_ARG_OPT_TOK        (0x3000)
#define RULE_ARG_OPT_RULE       (0x4000)

56
#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
57

Damien's avatar
Damien committed
58
59
60
61
62
63
64
65
66
67
68
69
70
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
71
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien's avatar
Damien committed
72
73
74
#include "grammar.h"
#undef DEF_RULE
    RULE_maximum_number_of,
75
    RULE_string, // special node for non-interned string
Damien's avatar
Damien committed
76
77
};

78
79
#define ident                   (RULE_ACT_ALLOW_IDENT)
#define blank                   (RULE_ACT_ADD_BLANK)
Damien's avatar
Damien committed
80
81
82
83
84
#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
85
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
86
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
87
#define opt_tok(t)              (RULE_ARG_OPT_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
88
89
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
90
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
91
#else
92
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
93
94
95
96
97
98
99
100
101
102
103
104
105
#endif
#include "grammar.h"
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_tok
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

106
STATIC const rule_t *rules[] = {
107
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien's avatar
Damien committed
108
109
110
111
112
#include "grammar.h"
#undef DEF_RULE
};

typedef struct _rule_stack_t {
113
114
115
    mp_uint_t src_line : 24;
    mp_uint_t rule_id : 8;
    mp_uint_t arg_i : 32; // what should the bit-size be?
Damien's avatar
Damien committed
116
117
118
} rule_stack_t;

typedef struct _parser_t {
119
120
    bool had_memory_error;

121
122
    mp_uint_t rule_stack_alloc;
    mp_uint_t rule_stack_top;
Damien's avatar
Damien committed
123
124
    rule_stack_t *rule_stack;

125
126
    mp_uint_t result_stack_alloc;
    mp_uint_t result_stack_top;
127
    mp_parse_node_t *result_stack;
128
129

    mp_lexer_t *lexer;
Damien's avatar
Damien committed
130
131
} parser_t;

132
133
134
135
STATIC inline void memory_error(parser_t *parser) {
    parser->had_memory_error = true;
}

136
STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) {
137
138
139
    if (parser->had_memory_error) {
        return;
    }
Damien's avatar
Damien committed
140
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
141
        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
142
143
144
145
146
        if (rs == NULL) {
            memory_error(parser);
            return;
        }
        parser->rule_stack = rs;
147
        parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien's avatar
Damien committed
148
    }
149
150
151
152
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
153
154
}

155
STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
Damien's avatar
Damien committed
156
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
157
    mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien's avatar
Damien committed
158
    assert(rule_id < RULE_maximum_number_of);
159
    push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien's avatar
Damien committed
160
161
}

162
STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
163
    assert(!parser->had_memory_error);
Damien's avatar
Damien committed
164
165
166
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
167
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
168
169
}

170
mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg) {
171
172
173
174
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
    return (mp_parse_node_t)(kind | (arg << 5));
Damien's avatar
Damien committed
175
176
}

177
void mp_parse_node_free(mp_parse_node_t pn) {
178
179
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
180
181
        mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
182
        if (rule_id == RULE_string) {
183
            m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]);
184
        } else {
185
            bool adjust = ADD_BLANK_NODE(rules[rule_id]);
186
187
188
189
190
191
192
193
194
            if (adjust) {
                n--;
            }
            for (mp_uint_t i = 0; i < n; i++) {
                mp_parse_node_free(pns->nodes[i]);
            }
            if (adjust) {
                n++;
            }
195
        }
196
        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
197
198
199
    }
}

200
#if MICROPY_DEBUG_PRINTERS
201
void mp_parse_node_print(mp_parse_node_t pn, mp_uint_t indent) {
202
203
204
205
206
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
207
    for (mp_uint_t i = 0; i < indent; i++) {
Damien's avatar
Damien committed
208
209
        printf(" ");
    }
210
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
211
        printf("NULL\n");
212
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
213
        mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
214
        printf("int(" INT_FMT ")\n", arg);
215
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
216
        mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
217
218
219
220
221
222
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
223
            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien's avatar
Damien committed
224
225
226
            default: assert(0);
        }
    } else {
227
        // node must be a mp_parse_node_struct_t
228
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
229
230
231
        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
            printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
        } else {
232
            mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
233
#ifdef USE_RULE_NAME
234
            printf("%s(" UINT_FMT ") (n=" UINT_FMT ")\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
235
#else
236
            printf("rule(" UINT_FMT ") (n=" UINT_FMT ")\n", (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
237
#endif
238
            for (mp_uint_t i = 0; i < n; i++) {
239
240
                mp_parse_node_print(pns->nodes[i], indent + 2);
            }
Damien's avatar
Damien committed
241
242
243
        }
    }
}
244
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
245
246

/*
247
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
248
    printf("result stack, most recent first\n");
249
    for (mp_int_t i = parser->result_stack_top - 1; i >= 0; i--) {
250
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
251
252
253
254
    }
}
*/

255
STATIC mp_parse_node_t pop_result(parser_t *parser) {
256
257
258
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
259
260
261
262
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

263
STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) {
264
265
266
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
267
268
269
270
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

271
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
272
273
274
    if (parser->had_memory_error) {
        return;
    }
275
    if (parser->result_stack_top >= parser->result_stack_alloc) {
276
        mp_parse_node_t *pn = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
277
278
279
280
281
        if (pn == NULL) {
            memory_error(parser);
            return;
        }
        parser->result_stack = pn;
282
        parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
283
    }
Damien's avatar
Damien committed
284
285
286
    parser->result_stack[parser->result_stack_top++] = pn;
}

287
STATIC void push_result_string(parser_t *parser, mp_uint_t src_line, const char *str, mp_uint_t len) {
288
289
290
291
292
293
294
295
296
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = RULE_string | (2 << 8);
    char *p = m_new(char, len);
    memcpy(p, str, len);
297
    pn->nodes[0] = (mp_int_t)p;
298
299
300
    pn->nodes[1] = len;
    push_result_node(parser, (mp_parse_node_t)pn);
}
301

302
STATIC void push_result_token(parser_t *parser) {
303
    mp_parse_node_t pn;
304
305
306
307
    mp_lexer_t *lex = parser->lexer;
    if (lex->tok_kind == MP_TOKEN_NAME) {
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
    } else if (lex->tok_kind == MP_TOKEN_NUMBER) {
Damien's avatar
Damien committed
308
309
        bool dec = false;
        bool small_int = true;
310
        mp_int_t int_val = 0;
311
312
        mp_uint_t len = lex->vstr.len;
        const char *str = lex->vstr.buf;
313
314
        mp_uint_t base = 0;
        mp_uint_t i = mp_parse_num_base(str, len, &base);
315
        bool overflow = false;
Damien's avatar
Damien committed
316
        for (; i < len; i++) {
317
            mp_uint_t dig;
318
            int clower = str[i] | 0x20;
319
            if (unichar_isdigit(str[i]) && str[i] - '0' < base) {
320
                dig = str[i] - '0';
321
322
323
            } else if (base == 16 && 'a' <= clower && clower <= 'f') {
                dig = clower - 'a' + 10;
            } else if (str[i] == '.' || clower == 'e' || clower == 'j') {
Damien's avatar
Damien committed
324
325
326
327
328
329
                dec = true;
                break;
            } else {
                small_int = false;
                break;
            }
330
331
            // add next digi and check for overflow
            if (mp_small_int_mul_overflow(int_val, base)) {
332
                overflow = true;
333
334
335
            }
            int_val = int_val * base + dig;
            if (!MP_SMALL_INT_FITS(int_val)) {
336
337
                overflow = true;
            }
Damien's avatar
Damien committed
338
339
        }
        if (dec) {
340
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
341
        } else if (small_int && !overflow && MP_SMALL_INT_FITS(int_val)) {
342
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
Damien's avatar
Damien committed
343
        } else {
344
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
Damien's avatar
Damien committed
345
        }
346
    } else if (lex->tok_kind == MP_TOKEN_STRING) {
347
348
349
        // Don't automatically intern all strings.  doc strings (which are usually large)
        // will be discarded by the compiler, and so we shouldn't intern them.
        qstr qst = MP_QSTR_NULL;
350
        if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
351
            // intern short strings
352
            qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
353
354
        } else {
            // check if this string is already interned
355
            qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
356
357
358
359
360
361
        }
        if (qst != MP_QSTR_NULL) {
            // qstr exists, make a leaf node
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst);
        } else {
            // not interned, make a node holding a pointer to the string data
362
            push_result_string(parser, lex->tok_line, lex->vstr.buf, lex->vstr.len);
363
364
            return;
        }
365
366
    } else if (lex->tok_kind == MP_TOKEN_BYTES) {
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
Damien's avatar
Damien committed
367
    } else {
368
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien's avatar
Damien committed
369
370
371
372
    }
    push_result_node(parser, pn);
}

373
STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) {
374
375
376
377
378
379
380
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
381
    for (mp_uint_t i = num_args; i > 0; i--) {
Damien's avatar
Damien committed
382
383
        pn->nodes[i - 1] = pop_result(parser);
    }
384
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
385
386
}

387
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_parse_error_kind_t *parse_error_kind_out) {
388

389
    // initialise parser and allocate memory for its stacks
390

391
    parser_t parser;
392

393
    parser.had_memory_error = false;
394

395
    parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
396
397
    parser.rule_stack_top = 0;
    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien's avatar
Damien committed
398

399
    parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
400
401
    parser.result_stack_top = 0;
    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien's avatar
Damien committed
402

403
404
405
406
407
408
    parser.lexer = lex;

    // check if we could allocate the stacks
    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
        goto memory_error;
    }
409

410
    // work out the top-level rule to use, and push it on the stack
411
    mp_uint_t top_level_rule;
Damien's avatar
Damien committed
412
    switch (input_kind) {
413
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
414
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
415
416
        default: top_level_rule = RULE_file_input;
    }
417
    push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
418

419
420
    // parse!

421
422
    mp_uint_t n, i; // state for the current rule
    mp_uint_t rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
423
    bool backtrack = false;
424
    const rule_t *rule = NULL;
Damien's avatar
Damien committed
425
426
427

    for (;;) {
        next_rule:
428
        if (parser.rule_stack_top == 0 || parser.had_memory_error) {
Damien's avatar
Damien committed
429
430
431
            break;
        }

432
        pop_rule(&parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
433
434
435
436
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
437
438
        printf("depth=%d ", parser.rule_stack_top);
        for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien's avatar
Damien committed
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
                for (; i < n - 1; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
                        case RULE_ARG_TOK:
454
455
                            if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                                push_result_token(&parser);
456
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
457
458
459
460
                                goto next_rule;
                            }
                            break;
                        case RULE_ARG_RULE:
461
462
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
463
464
465
466
467
468
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }
                if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
469
470
                    if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                        push_result_token(&parser);
471
                        mp_lexer_to_next(lex);
Damien's avatar
Damien committed
472
473
474
475
476
                    } else {
                        backtrack = true;
                        goto next_rule;
                    }
                } else {
477
                    push_rule_from_arg(&parser, rule->arg[i]);
Damien's avatar
Damien committed
478
479
480
                }
                break;

481
            case RULE_ACT_AND: {
Damien's avatar
Damien committed
482
483
484
485
486
487

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
488
                        push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
504
                        case RULE_ARG_TOK: {
Damien's avatar
Damien committed
505
                            // need to match a token
506
                            mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
507
                            if (lex->tok_kind == tok_kind) {
Damien's avatar
Damien committed
508
                                // matched token
509
                                if (tok_kind == MP_TOKEN_NAME) {
510
                                    push_result_token(&parser);
Damien's avatar
Damien committed
511
                                }
512
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
513
514
515
516
517
518
519
520
521
522
523
524
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
525
                                           }
Damien's avatar
Damien committed
526
527
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
528
529
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
530
531
532
533
534
535
536
537
538
539
540
541
                            goto next_rule;
                        default:
                            assert(0);
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
542
                bool emit_rule = false;
543
                for (mp_uint_t x = 0; x < n; ++x) {
Damien's avatar
Damien committed
544
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
545
                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
546
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
547
548
                            emit_rule = true;
                        }
549
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
550
551
552
553
554
555
556
557
558
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

559
560
#if !MICROPY_EMIT_CPYTHON && !MICROPY_ENABLE_DOC_STRING
                // this code discards lonely statements, such as doc strings
561
562
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                    mp_parse_node_t p = peek_result(&parser, 1);
563
                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
564
565
                        pop_result(&parser); // MP_PARSE_NODE_NULL
                        mp_parse_node_free(pop_result(&parser)); // RULE_string
566
                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
567
568
569
570
571
                        break;
                    }
                }
#endif

Damien's avatar
Damien committed
572
573
574
575
576
                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

577
578
579
580
581
582
                // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
                // rule should not be emitted if it has only 1 argument
                // NOTE: can't set this flag for atom_paren because we need it
                // to distinguish, for example, [a,b] from [(a,b)]
                // TODO possibly set for: varargslist_name, varargslist_equal
                if (rule->act & RULE_ACT_ALLOW_IDENT) {
Damien's avatar
Damien committed
583
584
585
586
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
587
                if (ADD_BLANK_NODE(rule)) {
Damien's avatar
Damien committed
588
                    emit_rule = true;
589
                    push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
590
591
592
                    i += 1;
                }

593
594
                mp_uint_t num_not_nil = 0;
                for (mp_uint_t x = 0; x < i; ++x) {
595
                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
596
597
598
599
600
                        num_not_nil += 1;
                    }
                }
                //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
                if (emit_rule) {
601
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
602
                } else if (num_not_nil == 0) {
603
                    push_result_rule(&parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien's avatar
Damien committed
604
605
606
607
                    //result_stack_show(parser);
                    //assert(0);
                } else if (num_not_nil == 1) {
                    // single result, leave it on stack
608
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
609
                    for (mp_uint_t x = 0; x < i; ++x) {
610
                        mp_parse_node_t pn2 = pop_result(&parser);
611
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
612
613
614
                            pn = pn2;
                        }
                    }
615
                    push_result_node(&parser, pn);
Damien's avatar
Damien committed
616
                } else {
617
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
618
619
                }
                break;
620
            }
Damien's avatar
Damien committed
621

622
            case RULE_ACT_LIST: {
Damien's avatar
Damien committed
623
624
625
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
626
                bool had_trailing_sep;
Damien's avatar
Damien committed
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
659
                        mp_uint_t arg = rule->arg[i & 1 & n];
Damien's avatar
Damien committed
660
661
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
662
                                if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
663
664
665
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
666
                                        push_result_token(&parser);
Damien's avatar
Damien committed
667
                                    }
668
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
669
670
671
672
673
674
675
676
677
678
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
679
680
                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(&parser, arg); // push child of list-rule
Damien's avatar
Damien committed
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
                                goto next_rule;
                            default:
                                assert(0);
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
700
                        push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
701
702
703
704
705
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
                    //printf("done list %s %d %d\n", rule->rule_name, n, i);
706
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
707
708
                }
                break;
709
            }
Damien's avatar
Damien committed
710
711
712
713
714

            default:
                assert(0);
        }
    }
715

716
717
718
    mp_parse_node_t result;

    // check if we had a memory error
719
720
    if (parser.had_memory_error) {
memory_error:
721
722
723
724
725
726
        *parse_error_kind_out = MP_PARSE_ERROR_MEMORY;
        result = MP_PARSE_NODE_NULL;
        goto finished;

    }

727
    // check we are at the end of the token stream
728
    if (lex->tok_kind != MP_TOKEN_END) {
729
        goto syntax_error;
Damien's avatar
Damien committed
730
    }
731

Damien's avatar
Damien committed
732
733
    //printf("--------------\n");
    //result_stack_show(parser);
734
735
    //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
    //printf("result stack alloc: %d\n", parser.result_stack_alloc);
Damien's avatar
Damien committed
736
    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
737
738

    // get the root parse node that we created
739
740
    assert(parser.result_stack_top == 1);
    result = parser.result_stack[0];
741
742
743

finished:
    // free the memory that we don't need anymore
744
745
    m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
    m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
746
747
748

    // return the result
    return result;
Damien's avatar
Damien committed
749
750

syntax_error:
751
    if (lex->tok_kind == MP_TOKEN_INDENT) {
752
        *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
753
    } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
754
        *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
755
    } else {
756
        *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
Damien's avatar
Damien committed
757
#ifdef USE_RULE_NAME
758
        // debugging: print the rule name that failed and the token
759
760
        printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
761
        mp_token_show(lex);
762
#endif
763
#endif
764
    }
765
766
    result = MP_PARSE_NODE_NULL;
    goto finished;
Damien's avatar
Damien committed
767
}