parse.c 30.4 KB
Newer Older
1
2
3
4
5
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
6
 * Copyright (c) 2013-2015 Damien P. George
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
Damien's avatar
Damien committed
28
29
30
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
31
#include <string.h>
Damien's avatar
Damien committed
32

33
#include "py/nlr.h"
34
35
36
37
#include "py/lexer.h"
#include "py/parse.h"
#include "py/parsenum.h"
#include "py/smallint.h"
Damien's avatar
Damien committed
38
39

#define RULE_ACT_ARG_MASK       (0x0f)
40
41
42
#define RULE_ACT_KIND_MASK      (0x30)
#define RULE_ACT_ALLOW_IDENT    (0x40)
#define RULE_ACT_ADD_BLANK      (0x80)
Damien's avatar
Damien committed
43
44
45
46
47
48
49
50
#define RULE_ACT_OR             (0x10)
#define RULE_ACT_AND            (0x20)
#define RULE_ACT_LIST           (0x30)

#define RULE_ARG_KIND_MASK      (0xf000)
#define RULE_ARG_ARG_MASK       (0x0fff)
#define RULE_ARG_TOK            (0x1000)
#define RULE_ARG_RULE           (0x2000)
51
#define RULE_ARG_OPT_RULE       (0x3000)
Damien's avatar
Damien committed
52

53
#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
54

Damien's avatar
Damien committed
55
56
57
58
59
60
61
62
63
64
65
66
67
// (un)comment to use rule names; for debugging
//#define USE_RULE_NAME (1)

typedef struct _rule_t {
    byte rule_id;
    byte act;
#ifdef USE_RULE_NAME
    const char *rule_name;
#endif
    uint16_t arg[];
} rule_t;

enum {
68
#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
69
#include "py/grammar.h"
Damien's avatar
Damien committed
70
71
#undef DEF_RULE
    RULE_maximum_number_of,
72
    RULE_string, // special node for non-interned string
73
    RULE_bytes, // special node for non-interned bytes
74
    RULE_const_object, // special node for a constant, generic Python object
Damien's avatar
Damien committed
75
76
};

77
78
#define ident                   (RULE_ACT_ALLOW_IDENT)
#define blank                   (RULE_ACT_ADD_BLANK)
Damien's avatar
Damien committed
79
80
81
82
83
#define or(n)                   (RULE_ACT_OR | n)
#define and(n)                  (RULE_ACT_AND | n)
#define one_or_more             (RULE_ACT_LIST | 2)
#define list                    (RULE_ACT_LIST | 1)
#define list_with_end           (RULE_ACT_LIST | 3)
84
#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
Damien's avatar
Damien committed
85
86
87
#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
#ifdef USE_RULE_NAME
88
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien's avatar
Damien committed
89
#else
90
#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien's avatar
Damien committed
91
#endif
92
#include "py/grammar.h"
Damien's avatar
Damien committed
93
94
95
96
97
98
99
100
101
102
#undef or
#undef and
#undef list
#undef list_with_end
#undef tok
#undef rule
#undef opt_rule
#undef one_or_more
#undef DEF_RULE

103
STATIC const rule_t *rules[] = {
104
#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
105
#include "py/grammar.h"
Damien's avatar
Damien committed
106
107
108
109
#undef DEF_RULE
};

typedef struct _rule_stack_t {
110
111
112
    mp_uint_t src_line : BITS_PER_WORD - 8; // maximum bits storing source line number
    mp_uint_t rule_id : 8; // this must be large enough to fit largest rule number
    mp_uint_t arg_i; // this dictates the maximum nodes in a "list" of things
Damien's avatar
Damien committed
113
114
115
} rule_stack_t;

typedef struct _parser_t {
116
117
    bool had_memory_error;

118
119
    mp_uint_t rule_stack_alloc;
    mp_uint_t rule_stack_top;
Damien's avatar
Damien committed
120
121
    rule_stack_t *rule_stack;

122
123
    mp_uint_t result_stack_alloc;
    mp_uint_t result_stack_top;
124
    mp_parse_node_t *result_stack;
125
126

    mp_lexer_t *lexer;
Damien's avatar
Damien committed
127
128
} parser_t;

129
130
131
132
STATIC inline void memory_error(parser_t *parser) {
    parser->had_memory_error = true;
}

133
STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) {
134
135
136
    if (parser->had_memory_error) {
        return;
    }
Damien's avatar
Damien committed
137
    if (parser->rule_stack_top >= parser->rule_stack_alloc) {
138
        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
139
140
141
142
143
        if (rs == NULL) {
            memory_error(parser);
            return;
        }
        parser->rule_stack = rs;
144
        parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien's avatar
Damien committed
145
    }
146
147
148
149
    rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
    rs->src_line = src_line;
    rs->rule_id = rule->rule_id;
    rs->arg_i = arg_i;
Damien's avatar
Damien committed
150
151
}

152
STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
Damien's avatar
Damien committed
153
    assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
154
    mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien's avatar
Damien committed
155
    assert(rule_id < RULE_maximum_number_of);
156
    push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien's avatar
Damien committed
157
158
}

159
STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
160
    assert(!parser->had_memory_error);
Damien's avatar
Damien committed
161
162
163
    parser->rule_stack_top -= 1;
    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
    *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
164
    *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien's avatar
Damien committed
165
166
}

167
mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg) {
168
169
170
    if (kind == MP_PARSE_NODE_SMALL_INT) {
        return (mp_parse_node_t)(kind | (arg << 1));
    }
171
    return (mp_parse_node_t)(kind | (arg << 4));
Damien's avatar
Damien committed
172
173
}

174
void mp_parse_node_free(mp_parse_node_t pn) {
175
176
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
177
178
        mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
179
        if (rule_id == RULE_string || rule_id == RULE_bytes) {
180
            m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]);
181
182
        } else if (rule_id == RULE_const_object) {
            // don't free the const object since it's probably used by the compiled code
183
        } else {
184
            bool adjust = ADD_BLANK_NODE(rules[rule_id]);
185
186
187
188
189
190
191
192
193
            if (adjust) {
                n--;
            }
            for (mp_uint_t i = 0; i < n; i++) {
                mp_parse_node_free(pns->nodes[i]);
            }
            if (adjust) {
                n++;
            }
194
        }
195
        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
196
197
198
    }
}

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
int mp_parse_node_extract_list(mp_parse_node_t *pn, mp_uint_t pn_kind, mp_parse_node_t **nodes) {
    if (MP_PARSE_NODE_IS_NULL(*pn)) {
        *nodes = NULL;
        return 0;
    } else if (MP_PARSE_NODE_IS_LEAF(*pn)) {
        *nodes = pn;
        return 1;
    } else {
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)(*pn);
        if (MP_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
            *nodes = pn;
            return 1;
        } else {
            *nodes = pns->nodes;
            return MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
        }
    }
}

218
#if MICROPY_DEBUG_PRINTERS
219
void mp_parse_node_print(mp_parse_node_t pn, mp_uint_t indent) {
220
221
222
223
224
    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
        printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
    } else {
        printf("       ");
    }
225
    for (mp_uint_t i = 0; i < indent; i++) {
Damien's avatar
Damien committed
226
227
        printf(" ");
    }
228
    if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien's avatar
Damien committed
229
        printf("NULL\n");
230
    } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
231
        mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
232
        printf("int(" INT_FMT ")\n", arg);
233
    } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
234
        mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
235
236
237
238
        switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
            case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
            case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
239
            case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien's avatar
Damien committed
240
241
242
            default: assert(0);
        }
    } else {
243
        // node must be a mp_parse_node_struct_t
244
        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
245
246
        if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
            printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
247
248
        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
            printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
249
250
        } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
            printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]);
251
        } else {
252
            mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien's avatar
Damien committed
253
#ifdef USE_RULE_NAME
254
            printf("%s(" UINT_FMT ") (n=" UINT_FMT ")\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
255
#else
256
            printf("rule(" UINT_FMT ") (n=" UINT_FMT ")\n", (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien's avatar
Damien committed
257
#endif
258
            for (mp_uint_t i = 0; i < n; i++) {
259
260
                mp_parse_node_print(pns->nodes[i], indent + 2);
            }
Damien's avatar
Damien committed
261
262
263
        }
    }
}
264
#endif // MICROPY_DEBUG_PRINTERS
Damien's avatar
Damien committed
265
266

/*
267
STATIC void result_stack_show(parser_t *parser) {
Damien's avatar
Damien committed
268
    printf("result stack, most recent first\n");
269
    for (mp_int_t i = parser->result_stack_top - 1; i >= 0; i--) {
270
        mp_parse_node_print(parser->result_stack[i], 0);
Damien's avatar
Damien committed
271
272
273
274
    }
}
*/

275
STATIC mp_parse_node_t pop_result(parser_t *parser) {
276
277
278
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
279
280
281
282
    assert(parser->result_stack_top > 0);
    return parser->result_stack[--parser->result_stack_top];
}

283
STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) {
284
285
286
    if (parser->had_memory_error) {
        return MP_PARSE_NODE_NULL;
    }
Damien's avatar
Damien committed
287
288
289
290
    assert(parser->result_stack_top > pos);
    return parser->result_stack[parser->result_stack_top - 1 - pos];
}

291
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
292
293
294
    if (parser->had_memory_error) {
        return;
    }
295
    if (parser->result_stack_top >= parser->result_stack_alloc) {
296
        mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
297
        if (stack == NULL) {
298
299
300
            memory_error(parser);
            return;
        }
301
        parser->result_stack = stack;
302
        parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
303
    }
Damien's avatar
Damien committed
304
305
306
    parser->result_stack[parser->result_stack_top++] = pn;
}

307
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) {
308
309
310
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
    if (pn == NULL) {
        memory_error(parser);
311
        return MP_PARSE_NODE_NULL;
312
313
    }
    pn->source_line = src_line;
314
    pn->kind_num_nodes = rule_kind | (2 << 8);
315
316
    char *p = m_new(char, len);
    memcpy(p, str, len);
317
    pn->nodes[0] = (mp_int_t)p;
318
    pn->nodes[1] = len;
319
320
321
322
323
324
325
326
327
328
329
330
331
    return (mp_parse_node_t)pn;
}

STATIC mp_parse_node_t make_node_const_object(parser_t *parser, mp_uint_t src_line, mp_obj_t obj) {
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 1);
    if (pn == NULL) {
        memory_error(parser);
        return MP_PARSE_NODE_NULL;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = RULE_const_object | (1 << 8);
    pn->nodes[0] = (mp_uint_t)obj;
    return (mp_parse_node_t)pn;
332
}
333

334
STATIC void push_result_token(parser_t *parser) {
335
    mp_parse_node_t pn;
336
337
338
    mp_lexer_t *lex = parser->lexer;
    if (lex->tok_kind == MP_TOKEN_NAME) {
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
339
340
341
342
    } else if (lex->tok_kind == MP_TOKEN_INTEGER) {
        mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
        if (MP_OBJ_IS_SMALL_INT(o)) {
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(o));
Damien's avatar
Damien committed
343
        } else {
344
            pn = make_node_const_object(parser, lex->tok_line, o);
Damien's avatar
Damien committed
345
        }
346
347
348
    } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) {
        mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex);
        pn = make_node_const_object(parser, lex->tok_line, o);
349
350
    } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
        // Don't automatically intern all strings/bytes.  doc strings (which are usually large)
351
352
        // will be discarded by the compiler, and so we shouldn't intern them.
        qstr qst = MP_QSTR_NULL;
353
        if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
354
            // intern short strings
355
            qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
356
357
        } else {
            // check if this string is already interned
358
            qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
359
360
361
        }
        if (qst != MP_QSTR_NULL) {
            // qstr exists, make a leaf node
362
            pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
363
        } else {
364
            // not interned, make a node holding a pointer to the string/bytes data
365
            pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
366
        }
Damien's avatar
Damien committed
367
    } else {
368
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien's avatar
Damien committed
369
370
371
372
    }
    push_result_node(parser, pn);
}

373
STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) {
374
375
376
377
378
379
380
    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
    if (pn == NULL) {
        memory_error(parser);
        return;
    }
    pn->source_line = src_line;
    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
381
    for (mp_uint_t i = num_args; i > 0; i--) {
Damien's avatar
Damien committed
382
383
        pn->nodes[i - 1] = pop_result(parser);
    }
384
    push_result_node(parser, (mp_parse_node_t)pn);
Damien's avatar
Damien committed
385
386
}

387
mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
388

389
    // initialise parser and allocate memory for its stacks
390

391
    parser_t parser;
392

393
    parser.had_memory_error = false;
394

395
    parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
396
397
    parser.rule_stack_top = 0;
    parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien's avatar
Damien committed
398

399
    parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
400
401
    parser.result_stack_top = 0;
    parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien's avatar
Damien committed
402

403
404
405
406
407
408
    parser.lexer = lex;

    // check if we could allocate the stacks
    if (parser.rule_stack == NULL || parser.result_stack == NULL) {
        goto memory_error;
    }
409

410
    // work out the top-level rule to use, and push it on the stack
411
    mp_uint_t top_level_rule;
Damien's avatar
Damien committed
412
    switch (input_kind) {
413
        case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien George's avatar
Damien George committed
414
        case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien's avatar
Damien committed
415
416
        default: top_level_rule = RULE_file_input;
    }
417
    push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien's avatar
Damien committed
418

419
420
    // parse!

421
422
    mp_uint_t n, i; // state for the current rule
    mp_uint_t rule_src_line; // source line for the first token matched by the current rule
Damien's avatar
Damien committed
423
    bool backtrack = false;
424
    const rule_t *rule = NULL;
Damien's avatar
Damien committed
425
426
427

    for (;;) {
        next_rule:
428
        if (parser.rule_stack_top == 0 || parser.had_memory_error) {
Damien's avatar
Damien committed
429
430
431
            break;
        }

432
        pop_rule(&parser, &rule, &i, &rule_src_line);
Damien's avatar
Damien committed
433
434
435
436
        n = rule->act & RULE_ACT_ARG_MASK;

        /*
        // debugging
437
438
        printf("depth=%d ", parser.rule_stack_top);
        for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien's avatar
Damien committed
439
440
441
442
443
444
445
446
447
448
449
450
            printf(" ");
        }
        printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
        */

        switch (rule->act & RULE_ACT_KIND_MASK) {
            case RULE_ACT_OR:
                if (i > 0 && !backtrack) {
                    goto next_rule;
                } else {
                    backtrack = false;
                }
451
452
453
454
455
456
                for (; i < n; ++i) {
                    uint16_t kind = rule->arg[i] & RULE_ARG_KIND_MASK;
                    if (kind == RULE_ARG_TOK) {
                        if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
                            push_result_token(&parser);
                            mp_lexer_to_next(lex);
Damien's avatar
Damien committed
457
                            goto next_rule;
458
                        }
Damien's avatar
Damien committed
459
                    } else {
460
461
462
463
464
                        assert(kind == RULE_ARG_RULE);
                        if (i + 1 < n) {
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
                        }
                        push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien's avatar
Damien committed
465
466
467
                        goto next_rule;
                    }
                }
468
                backtrack = true;
Damien's avatar
Damien committed
469
470
                break;

471
            case RULE_ACT_AND: {
Damien's avatar
Damien committed
472
473
474
475
476
477

                // failed, backtrack if we can, else syntax error
                if (backtrack) {
                    assert(i > 0);
                    if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
                        // an optional rule that failed, so continue with next arg
478
                        push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
                        backtrack = false;
                    } else {
                        // a mandatory rule that failed, so propagate backtrack
                        if (i > 1) {
                            // already eaten tokens so can't backtrack
                            goto syntax_error;
                        } else {
                            goto next_rule;
                        }
                    }
                }

                // progress through the rule
                for (; i < n; ++i) {
                    switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
494
                        case RULE_ARG_TOK: {
Damien's avatar
Damien committed
495
                            // need to match a token
496
                            mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
497
                            if (lex->tok_kind == tok_kind) {
Damien's avatar
Damien committed
498
                                // matched token
499
                                if (tok_kind == MP_TOKEN_NAME) {
500
                                    push_result_token(&parser);
Damien's avatar
Damien committed
501
                                }
502
                                mp_lexer_to_next(lex);
Damien's avatar
Damien committed
503
504
505
506
507
508
509
510
511
512
513
514
                            } else {
                                // failed to match token
                                if (i > 0) {
                                    // already eaten tokens so can't backtrack
                                    goto syntax_error;
                                } else {
                                    // this rule failed, so backtrack
                                    backtrack = true;
                                    goto next_rule;
                                }
                            }
                            break;
515
                        }
Damien's avatar
Damien committed
516
517
                        case RULE_ARG_RULE:
                        case RULE_ARG_OPT_RULE:
518
                        rule_and_no_other_choice:
519
520
                            push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
                            push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien's avatar
Damien committed
521
522
523
                            goto next_rule;
                        default:
                            assert(0);
524
                            goto rule_and_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
525
526
527
528
529
530
531
532
533
                    }
                }

                assert(i == n);

                // matched the rule, so now build the corresponding parse_node

                // count number of arguments for the parse_node
                i = 0;
534
                bool emit_rule = false;
535
                for (mp_uint_t x = 0; x < n; ++x) {
Damien's avatar
Damien committed
536
                    if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
537
                        mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
538
                        if (tok_kind >= MP_TOKEN_NAME) {
Damien's avatar
Damien committed
539
540
                            emit_rule = true;
                        }
541
                        if (tok_kind == MP_TOKEN_NAME) {
Damien's avatar
Damien committed
542
543
544
545
546
547
548
549
550
                            // only tokens which were names are pushed to stack
                            i += 1;
                        }
                    } else {
                        // rules are always pushed
                        i += 1;
                    }
                }

551
                #if !MICROPY_ENABLE_DOC_STRING
552
                // this code discards lonely statements, such as doc strings
553
554
                if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
                    mp_parse_node_t p = peek_result(&parser, 1);
555
                    if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
556
557
                        pop_result(&parser); // MP_PARSE_NODE_NULL
                        mp_parse_node_free(pop_result(&parser)); // RULE_string
558
                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
559
560
561
                        break;
                    }
                }
562
                #endif
563

Damien's avatar
Damien committed
564
565
566
567
568
                // always emit these rules, even if they have only 1 argument
                if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
                    emit_rule = true;
                }

569
570
571
572
573
574
                // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
                // rule should not be emitted if it has only 1 argument
                // NOTE: can't set this flag for atom_paren because we need it
                // to distinguish, for example, [a,b] from [(a,b)]
                // TODO possibly set for: varargslist_name, varargslist_equal
                if (rule->act & RULE_ACT_ALLOW_IDENT) {
Damien's avatar
Damien committed
575
576
577
578
                    emit_rule = false;
                }

                // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
579
                if (ADD_BLANK_NODE(rule)) {
Damien's avatar
Damien committed
580
                    emit_rule = true;
581
                    push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien's avatar
Damien committed
582
583
584
                    i += 1;
                }

585
586
                mp_uint_t num_not_nil = 0;
                for (mp_uint_t x = 0; x < i; ++x) {
587
                    if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
588
589
590
591
                        num_not_nil += 1;
                    }
                }
                if (emit_rule) {
592
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
593
                } else if (num_not_nil == 0) {
594
                    push_result_rule(&parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien's avatar
Damien committed
595
596
                } else if (num_not_nil == 1) {
                    // single result, leave it on stack
597
                    mp_parse_node_t pn = MP_PARSE_NODE_NULL;
598
                    for (mp_uint_t x = 0; x < i; ++x) {
599
                        mp_parse_node_t pn2 = pop_result(&parser);
600
                        if (pn2 != MP_PARSE_NODE_NULL) {
Damien's avatar
Damien committed
601
602
603
                            pn = pn2;
                        }
                    }
604
                    push_result_node(&parser, pn);
Damien's avatar
Damien committed
605
                } else {
606
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
607
608
                }
                break;
609
            }
Damien's avatar
Damien committed
610

611
            case RULE_ACT_LIST: {
Damien's avatar
Damien committed
612
613
614
                // n=2 is: item item*
                // n=1 is: item (sep item)*
                // n=3 is: item (sep item)* [sep]
615
                bool had_trailing_sep;
Damien's avatar
Damien committed
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
                if (backtrack) {
                    list_backtrack:
                    had_trailing_sep = false;
                    if (n == 2) {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else {
                            // fail on item, in later rounds; finish with this rule
                            backtrack = false;
                        }
                    } else {
                        if (i == 1) {
                            // fail on item, first time round; propagate backtrack
                            goto next_rule;
                        } else if ((i & 1) == 1) {
                            // fail on item, in later rounds; have eaten tokens so can't backtrack
                            if (n == 3) {
                                // list allows trailing separator; finish parsing list
                                had_trailing_sep = true;
                                backtrack = false;
                            } else {
                                // list doesn't allowing trailing separator; fail
                                goto syntax_error;
                            }
                        } else {
                            // fail on separator; finish parsing list
                            backtrack = false;
                        }
                    }
                } else {
                    for (;;) {
648
                        mp_uint_t arg = rule->arg[i & 1 & n];
Damien's avatar
Damien committed
649
650
                        switch (arg & RULE_ARG_KIND_MASK) {
                            case RULE_ARG_TOK:
651
                                if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
Damien's avatar
Damien committed
652
653
654
                                    if (i & 1 & n) {
                                        // separators which are tokens are not pushed to result stack
                                    } else {
655
                                        push_result_token(&parser);
Damien's avatar
Damien committed
656
                                    }
657
                                    mp_lexer_to_next(lex);
Damien's avatar
Damien committed
658
659
660
661
662
663
664
665
666
667
                                    // got element of list, so continue parsing list
                                    i += 1;
                                } else {
                                    // couldn't get element of list
                                    i += 1;
                                    backtrack = true;
                                    goto list_backtrack;
                                }
                                break;
                            case RULE_ARG_RULE:
668
                            rule_list_no_other_choice:
669
670
                                push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
                                push_rule_from_arg(&parser, arg); // push child of list-rule
Damien's avatar
Damien committed
671
672
673
                                goto next_rule;
                            default:
                                assert(0);
674
                                goto rule_list_no_other_choice; // to help flow control analysis
Damien's avatar
Damien committed
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
                        }
                    }
                }
                assert(i >= 1);

                // compute number of elements in list, result in i
                i -= 1;
                if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
                    // don't count separators when they are tokens
                    i = (i + 1) / 2;
                }

                if (i == 1) {
                    // list matched single item
                    if (had_trailing_sep) {
                        // if there was a trailing separator, make a list of a single item
691
                        push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
692
693
694
695
                    } else {
                        // just leave single item on stack (ie don't wrap in a list)
                    }
                } else {
696
                    push_result_rule(&parser, rule_src_line, rule, i);
Damien's avatar
Damien committed
697
698
                }
                break;
699
            }
Damien's avatar
Damien committed
700
701
702
703
704

            default:
                assert(0);
        }
    }
705

706
707
    mp_obj_t exc;
    mp_parse_node_t result;
708
709

    // check if we had a memory error
710
711
    if (parser.had_memory_error) {
memory_error:
712
713
        exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
            "parser could not allocate enough memory");
714
        result = MP_PARSE_NODE_NULL;
715
716
717
        goto finished;
    }

718
    // check we are at the end of the token stream
719
    if (lex->tok_kind != MP_TOKEN_END) {
720
        goto syntax_error;
Damien's avatar
Damien committed
721
    }
722

723
724
725
726
727
    // check that parsing resulted in a parse node (can fail on empty input)
    if (parser.result_stack_top == 0) {
        goto syntax_error;
    }

Damien's avatar
Damien committed
728
    //result_stack_show(parser);
729
730
    //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
    //printf("result stack alloc: %d\n", parser.result_stack_alloc);
Damien's avatar
Damien committed
731
    //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
732
733

    // get the root parse node that we created
734
    assert(parser.result_stack_top == 1);
735
    exc = MP_OBJ_NULL;
736
    result = parser.result_stack[0];
737
738
739

finished:
    // free the memory that we don't need anymore
740
741
    m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
    m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
742
743
744
745
746
747
748
749
750
751
752
753
754
    // we also free the lexer on behalf of the caller (see below)

    if (exc != MP_OBJ_NULL) {
        // had an error so raise the exception
        // add traceback to give info about file name and location
        // we don't have a 'block' name, so just pass the NULL qstr to indicate this
        mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
        mp_lexer_free(lex);
        nlr_raise(exc);
    } else {
        mp_lexer_free(lex);
        return result;
    }
Damien's avatar
Damien committed
755
756

syntax_error:
757
    if (lex->tok_kind == MP_TOKEN_INDENT) {
758
759
        exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
            "unexpected indent");
760
    } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
761
762
        exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
            "unindent does not match any outer indentation level");
763
    } else {
764
765
        exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
            "invalid syntax");
Damien's avatar
Damien committed
766
#ifdef USE_RULE_NAME
767
        // debugging: print the rule name that failed and the token
768
769
        printf("rule: %s\n", rule->rule_name);
#if MICROPY_DEBUG_PRINTERS
770
        mp_lexer_show_token(lex);
771
#endif
772
#endif
773
    }
774
    result = MP_PARSE_NODE_NULL;
775
    goto finished;
Damien's avatar
Damien committed
776
}