Commit a4c52c5a authored by Damien George's avatar Damien George
Browse files

py: Optimise lexer by exposing lexer type.

mp_lexer_t type is exposed, mp_token_t type is removed, and simple lexer
functions (like checking current token kind) are now inlined.

This saves 784 bytes ROM on 32-bit unix, 348 bytes on stmhal, and 460
bytes on bare-arm.  It also saves a tiny bit of RAM since mp_lexer_t
is a bit smaller.  Also will run a bit more efficiently.
parent 41c07d5b
...@@ -32,7 +32,7 @@ void do_str(const char *src) { ...@@ -32,7 +32,7 @@ void do_str(const char *src) {
} }
// parse okay // parse okay
qstr source_name = mp_lexer_source_name(lex); qstr source_name = lex->source_name;
mp_lexer_free(lex); mp_lexer_free(lex);
mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true); mp_obj_t module_fun = mp_compile(pn, source_name, MP_EMIT_OPT_NONE, true);
......
...@@ -127,7 +127,7 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) { ...@@ -127,7 +127,7 @@ STATIC void do_load(mp_obj_t module_obj, vstr_t *file) {
} }
#if MICROPY_PY___FILE__ #if MICROPY_PY___FILE__
qstr source_name = mp_lexer_source_name(lex); qstr source_name = lex->source_name;
mp_store_attr(module_obj, MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name)); mp_store_attr(module_obj, MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));
#endif #endif
......
...@@ -42,32 +42,10 @@ ...@@ -42,32 +42,10 @@
// TODO seems that CPython allows NULL byte in the input stream // TODO seems that CPython allows NULL byte in the input stream
// don't know if that's intentional or not, but we don't allow it // don't know if that's intentional or not, but we don't allow it
struct _mp_lexer_t {
qstr source_name; // name of source
void *stream_data; // data for stream
mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
mp_lexer_stream_close_t stream_close; // stream callback to free
unichar chr0, chr1, chr2; // current cached characters from source
mp_uint_t line; // source line
mp_uint_t column; // source column
mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
mp_uint_t alloc_indent_level;
mp_uint_t num_indent_level;
uint16_t *indent_level;
vstr_t vstr;
mp_token_t tok_cur;
};
mp_uint_t mp_optimise_value; mp_uint_t mp_optimise_value;
// TODO replace with a call to a standard function // TODO replace with a call to a standard function
bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) { STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
mp_uint_t i = 0; mp_uint_t i = 0;
while (i < len && *str == *strn) { while (i < len && *str == *strn) {
...@@ -79,27 +57,6 @@ bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) { ...@@ -79,27 +57,6 @@ bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
return i == len && *str == 0; return i == len && *str == 0;
} }
#ifdef MICROPY_DEBUG_PRINTERS
void mp_token_show(const mp_token_t *tok) {
printf("(" UINT_FMT ":" UINT_FMT ") kind:%u str:%p len:" UINT_FMT, tok->src_line, tok->src_column, tok->kind, tok->str, tok->len);
if (tok->str != NULL && tok->len > 0) {
const byte *i = (const byte *)tok->str;
const byte *j = (const byte *)i + tok->len;
printf(" ");
while (i < j) {
unichar c = utf8_get_char(i);
i = utf8_next_char(i);
if (unichar_isprint(c)) {
printf("%c", c);
} else {
printf("?");
}
}
}
printf("\n");
}
#endif
#define CUR_CHAR(lex) ((lex)->chr0) #define CUR_CHAR(lex) ((lex)->chr0)
STATIC bool is_end(mp_lexer_t *lex) { STATIC bool is_end(mp_lexer_t *lex) {
...@@ -210,7 +167,7 @@ STATIC void next_char(mp_lexer_t *lex) { ...@@ -210,7 +167,7 @@ STATIC void next_char(mp_lexer_t *lex) {
} }
} }
void indent_push(mp_lexer_t *lex, mp_uint_t indent) { STATIC void indent_push(mp_lexer_t *lex, mp_uint_t indent) {
if (lex->num_indent_level >= lex->alloc_indent_level) { if (lex->num_indent_level >= lex->alloc_indent_level) {
// TODO use m_renew_maybe and somehow indicate an error if it fails... probably by using MP_TOKEN_MEMORY_ERROR // TODO use m_renew_maybe and somehow indicate an error if it fails... probably by using MP_TOKEN_MEMORY_ERROR
lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC); lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC);
...@@ -219,11 +176,11 @@ void indent_push(mp_lexer_t *lex, mp_uint_t indent) { ...@@ -219,11 +176,11 @@ void indent_push(mp_lexer_t *lex, mp_uint_t indent) {
lex->indent_level[lex->num_indent_level++] = indent; lex->indent_level[lex->num_indent_level++] = indent;
} }
mp_uint_t indent_top(mp_lexer_t *lex) { STATIC mp_uint_t indent_top(mp_lexer_t *lex) {
return lex->indent_level[lex->num_indent_level - 1]; return lex->indent_level[lex->num_indent_level - 1];
} }
void indent_pop(mp_lexer_t *lex) { STATIC void indent_pop(mp_lexer_t *lex) {
lex->num_indent_level -= 1; lex->num_indent_level -= 1;
} }
...@@ -335,7 +292,10 @@ STATIC bool get_hex(mp_lexer_t *lex, mp_uint_t num_digits, mp_uint_t *result) { ...@@ -335,7 +292,10 @@ STATIC bool get_hex(mp_lexer_t *lex, mp_uint_t num_digits, mp_uint_t *result) {
return true; return true;
} }
STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) { STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
// start new token text
vstr_reset(&lex->vstr);
// skip white space and comments // skip white space and comments
bool had_physical_newline = false; bool had_physical_newline = false;
while (!is_end(lex)) { while (!is_end(lex)) {
...@@ -355,12 +315,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -355,12 +315,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
next_char(lex); next_char(lex);
if (!is_physical_newline(lex)) { if (!is_physical_newline(lex)) {
// SyntaxError: unexpected character after line continuation character // SyntaxError: unexpected character after line continuation character
tok->src_line = lex->line; lex->tok_line = lex->line;
tok->src_column = lex->column; lex->tok_column = lex->column;
tok->kind = MP_TOKEN_BAD_LINE_CONTINUATION; lex->tok_kind = MP_TOKEN_BAD_LINE_CONTINUATION;
vstr_reset(&lex->vstr);
tok->str = vstr_str(&lex->vstr);
tok->len = 0;
return; return;
} else { } else {
next_char(lex); next_char(lex);
...@@ -371,29 +328,26 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -371,29 +328,26 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
} }
// set token source information // set token source information
tok->src_line = lex->line; lex->tok_line = lex->line;
tok->src_column = lex->column; lex->tok_column = lex->column;
// start new token text
vstr_reset(&lex->vstr);
if (first_token && lex->line == 1 && lex->column != 1) { if (first_token && lex->line == 1 && lex->column != 1) {
// check that the first token is in the first column // check that the first token is in the first column
// if first token is not on first line, we get a physical newline and // if first token is not on first line, we get a physical newline and
// this check is done as part of normal indent/dedent checking below // this check is done as part of normal indent/dedent checking below
// (done to get equivalence with CPython) // (done to get equivalence with CPython)
tok->kind = MP_TOKEN_INDENT; lex->tok_kind = MP_TOKEN_INDENT;
} else if (lex->emit_dent < 0) { } else if (lex->emit_dent < 0) {
tok->kind = MP_TOKEN_DEDENT; lex->tok_kind = MP_TOKEN_DEDENT;
lex->emit_dent += 1; lex->emit_dent += 1;
} else if (lex->emit_dent > 0) { } else if (lex->emit_dent > 0) {
tok->kind = MP_TOKEN_INDENT; lex->tok_kind = MP_TOKEN_INDENT;
lex->emit_dent -= 1; lex->emit_dent -= 1;
} else if (had_physical_newline && lex->nested_bracket_level == 0) { } else if (had_physical_newline && lex->nested_bracket_level == 0) {
tok->kind = MP_TOKEN_NEWLINE; lex->tok_kind = MP_TOKEN_NEWLINE;
mp_uint_t num_spaces = lex->column - 1; mp_uint_t num_spaces = lex->column - 1;
lex->emit_dent = 0; lex->emit_dent = 0;
...@@ -407,20 +361,20 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -407,20 +361,20 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
lex->emit_dent -= 1; lex->emit_dent -= 1;
} }
if (num_spaces != indent_top(lex)) { if (num_spaces != indent_top(lex)) {
tok->kind = MP_TOKEN_DEDENT_MISMATCH; lex->tok_kind = MP_TOKEN_DEDENT_MISMATCH;
} }
} }
} else if (is_end(lex)) { } else if (is_end(lex)) {
if (indent_top(lex) > 0) { if (indent_top(lex) > 0) {
tok->kind = MP_TOKEN_NEWLINE; lex->tok_kind = MP_TOKEN_NEWLINE;
lex->emit_dent = 0; lex->emit_dent = 0;
while (indent_top(lex) > 0) { while (indent_top(lex) > 0) {
indent_pop(lex); indent_pop(lex);
lex->emit_dent -= 1; lex->emit_dent -= 1;
} }
} else { } else {
tok->kind = MP_TOKEN_END; lex->tok_kind = MP_TOKEN_END;
} }
} else if (is_char_or(lex, '\'', '\"') } else if (is_char_or(lex, '\'', '\"')
...@@ -451,9 +405,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -451,9 +405,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
// set token kind // set token kind
if (is_bytes) { if (is_bytes) {
tok->kind = MP_TOKEN_BYTES; lex->tok_kind = MP_TOKEN_BYTES;
} else { } else {
tok->kind = MP_TOKEN_STRING; lex->tok_kind = MP_TOKEN_STRING;
} }
// get first quoting character // get first quoting character
...@@ -566,14 +520,14 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -566,14 +520,14 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
// check we got the required end quotes // check we got the required end quotes
if (n_closing < num_quotes) { if (n_closing < num_quotes) {
tok->kind = MP_TOKEN_LONELY_STRING_OPEN; lex->tok_kind = MP_TOKEN_LONELY_STRING_OPEN;
} }
// cut off the end quotes from the token text // cut off the end quotes from the token text
vstr_cut_tail_bytes(&lex->vstr, n_closing); vstr_cut_tail_bytes(&lex->vstr, n_closing);
} else if (is_head_of_identifier(lex)) { } else if (is_head_of_identifier(lex)) {
tok->kind = MP_TOKEN_NAME; lex->tok_kind = MP_TOKEN_NAME;
// get first char // get first char
vstr_add_char(&lex->vstr, CUR_CHAR(lex)); vstr_add_char(&lex->vstr, CUR_CHAR(lex));
...@@ -586,7 +540,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -586,7 +540,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
} }
} else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) { } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
tok->kind = MP_TOKEN_NUMBER; lex->tok_kind = MP_TOKEN_NUMBER;
// get first char // get first char
vstr_add_char(&lex->vstr, CUR_CHAR(lex)); vstr_add_char(&lex->vstr, CUR_CHAR(lex));
...@@ -621,9 +575,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -621,9 +575,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
vstr_add_char(&lex->vstr, '.'); vstr_add_char(&lex->vstr, '.');
next_char(lex); next_char(lex);
next_char(lex); next_char(lex);
tok->kind = MP_TOKEN_ELLIPSIS; lex->tok_kind = MP_TOKEN_ELLIPSIS;
} else { } else {
tok->kind = MP_TOKEN_DEL_PERIOD; lex->tok_kind = MP_TOKEN_DEL_PERIOD;
} }
} else { } else {
...@@ -645,7 +599,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -645,7 +599,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
if (*t == 0) { if (*t == 0) {
// didn't match any delimiter or operator characters // didn't match any delimiter or operator characters
tok->kind = MP_TOKEN_INVALID; lex->tok_kind = MP_TOKEN_INVALID;
} else { } else {
// matched a delimiter or operator character // matched a delimiter or operator character
...@@ -670,7 +624,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -670,7 +624,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
next_char(lex); next_char(lex);
tok_enc_index = t_index; tok_enc_index = t_index;
} else { } else {
tok->kind = MP_TOKEN_INVALID; lex->tok_kind = MP_TOKEN_INVALID;
goto tok_enc_no_match; goto tok_enc_no_match;
} }
break; break;
...@@ -692,37 +646,33 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -692,37 +646,33 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
} }
// set token kind // set token kind
tok->kind = tok_enc_kind[tok_enc_index]; lex->tok_kind = tok_enc_kind[tok_enc_index];
tok_enc_no_match: tok_enc_no_match:
// compute bracket level for implicit line joining // compute bracket level for implicit line joining
if (tok->kind == MP_TOKEN_DEL_PAREN_OPEN || tok->kind == MP_TOKEN_DEL_BRACKET_OPEN || tok->kind == MP_TOKEN_DEL_BRACE_OPEN) { if (lex->tok_kind == MP_TOKEN_DEL_PAREN_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACKET_OPEN || lex->tok_kind == MP_TOKEN_DEL_BRACE_OPEN) {
lex->nested_bracket_level += 1; lex->nested_bracket_level += 1;
} else if (tok->kind == MP_TOKEN_DEL_PAREN_CLOSE || tok->kind == MP_TOKEN_DEL_BRACKET_CLOSE || tok->kind == MP_TOKEN_DEL_BRACE_CLOSE) { } else if (lex->tok_kind == MP_TOKEN_DEL_PAREN_CLOSE || lex->tok_kind == MP_TOKEN_DEL_BRACKET_CLOSE || lex->tok_kind == MP_TOKEN_DEL_BRACE_CLOSE) {
lex->nested_bracket_level -= 1; lex->nested_bracket_level -= 1;
} }
} }
} }
// point token text to vstr buffer
tok->str = vstr_str(&lex->vstr);
tok->len = vstr_len(&lex->vstr);
// check for keywords // check for keywords
if (tok->kind == MP_TOKEN_NAME) { if (lex->tok_kind == MP_TOKEN_NAME) {
// We check for __debug__ here and convert it to its value. This is so // We check for __debug__ here and convert it to its value. This is so
// the parser gives a syntax error on, eg, x.__debug__. Otherwise, we // the parser gives a syntax error on, eg, x.__debug__. Otherwise, we
// need to check for this special token in many places in the compiler. // need to check for this special token in many places in the compiler.
// TODO improve speed of these string comparisons // TODO improve speed of these string comparisons
//for (mp_int_t i = 0; tok_kw[i] != NULL; i++) { //for (mp_int_t i = 0; tok_kw[i] != NULL; i++) {
for (mp_int_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) { for (mp_int_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) {
if (str_strn_equal(tok_kw[i], tok->str, tok->len)) { if (str_strn_equal(tok_kw[i], lex->vstr.buf, lex->vstr.len)) {
if (i == MP_ARRAY_SIZE(tok_kw) - 1) { if (i == MP_ARRAY_SIZE(tok_kw) - 1) {
// tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__" // tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__"
tok->kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE); lex->tok_kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE);
} else { } else {
tok->kind = MP_TOKEN_KW_FALSE + i; lex->tok_kind = MP_TOKEN_KW_FALSE + i;
} }
break; break;
} }
...@@ -782,7 +732,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_ ...@@ -782,7 +732,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
} }
// preload first token // preload first token
mp_lexer_next_token_into(lex, &lex->tok_cur, true); mp_lexer_next_token_into(lex, true);
return lex; return lex;
} }
...@@ -798,18 +748,27 @@ void mp_lexer_free(mp_lexer_t *lex) { ...@@ -798,18 +748,27 @@ void mp_lexer_free(mp_lexer_t *lex) {
} }
} }
qstr mp_lexer_source_name(mp_lexer_t *lex) {
return lex->source_name;
}
void mp_lexer_to_next(mp_lexer_t *lex) { void mp_lexer_to_next(mp_lexer_t *lex) {
mp_lexer_next_token_into(lex, &lex->tok_cur, false); mp_lexer_next_token_into(lex, false);
}
const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex) {
return &lex->tok_cur;
} }
bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind) { #if MICROPY_DEBUG_PRINTERS
return lex->tok_cur.kind == kind; void mp_lexer_show_token(const mp_lexer_t *lex) {
printf("(" UINT_FMT ":" UINT_FMT ") kind:%u str:%p len:%u", lex->tok_line, lex->tok_column, lex->tok_kind, lex->vstr.buf, lex->vstr.len);
if (lex->vstr.len > 0) {
const byte *i = (const byte *)lex->vstr.buf;
const byte *j = (const byte *)i + lex->vstr.len;
printf(" ");
while (i < j) {
unichar c = utf8_get_char(i);
i = utf8_next_char(i);
if (unichar_isprint(c)) {
printf("%c", c);
} else {
printf("?");
}
}
}
printf("\n");
} }
#endif
...@@ -130,15 +130,6 @@ typedef enum _mp_token_kind_t { ...@@ -130,15 +130,6 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_MINUS_MORE, MP_TOKEN_DEL_MINUS_MORE,
} mp_token_kind_t; } mp_token_kind_t;
typedef struct _mp_token_t {
mp_uint_t src_line; // source line
mp_uint_t src_column; // source column
mp_token_kind_t kind; // kind of token
const char *str; // string of token (valid only while this token is current token)
mp_uint_t len; // (byte) length of string of token
} mp_token_t;
// the next-byte function must return the next byte in the stream // the next-byte function must return the next byte in the stream
// it must return MP_LEXER_EOF if end of stream // it must return MP_LEXER_EOF if end of stream
// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF // it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
...@@ -146,21 +137,38 @@ typedef struct _mp_token_t { ...@@ -146,21 +137,38 @@ typedef struct _mp_token_t {
typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*); typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
typedef void (*mp_lexer_stream_close_t)(void*); typedef void (*mp_lexer_stream_close_t)(void*);
typedef struct _mp_lexer_t mp_lexer_t; // this data structure is exposed for efficiency
// public members are: source_name, tok_line, tok_column, tok_kind, vstr
typedef struct _mp_lexer_t {
qstr source_name; // name of source
void *stream_data; // data for stream
mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
mp_lexer_stream_close_t stream_close; // stream callback to free
unichar chr0, chr1, chr2; // current cached characters from source
mp_uint_t line; // current source line
mp_uint_t column; // current source column
void mp_token_show(const mp_token_t *tok); mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
mp_uint_t alloc_indent_level;
mp_uint_t num_indent_level;
uint16_t *indent_level;
mp_uint_t tok_line; // token source line
mp_uint_t tok_column; // token source column
mp_token_kind_t tok_kind; // token kind
vstr_t vstr; // token data
} mp_lexer_t;
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close); mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len); mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
void mp_lexer_free(mp_lexer_t *lex); void mp_lexer_free(mp_lexer_t *lex);
qstr mp_lexer_source_name(mp_lexer_t *lex);
void mp_lexer_to_next(mp_lexer_t *lex); void mp_lexer_to_next(mp_lexer_t *lex);
const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex); void mp_lexer_show_token(const mp_lexer_t *lex);
bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex);
bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);
/******************************************************************/ /******************************************************************/
// platform specific import function; must be implemented for a specific port // platform specific import function; must be implemented for a specific port
......
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <stdint.h>
#include "mpconfig.h" #include "mpconfig.h"
#include "misc.h" #include "misc.h"
#include "qstr.h" #include "qstr.h"
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#if MICROPY_HELPER_LEXER_UNIX #if MICROPY_HELPER_LEXER_UNIX
#include <stdio.h> #include <stdio.h>
#include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/stat.h> #include <sys/stat.h>
......
...@@ -155,7 +155,7 @@ STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) { ...@@ -155,7 +155,7 @@ STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE); assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK; mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
assert(rule_id < RULE_maximum_number_of); assert(rule_id < RULE_maximum_number_of);
push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0); push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
} }
STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) { STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
...@@ -298,17 +298,17 @@ STATIC void push_result_string(parser_t *parser, mp_uint_t src_line, const char ...@@ -298,17 +298,17 @@ STATIC void push_result_string(parser_t *parser, mp_uint_t src_line, const char
push_result_node(parser, (mp_parse_node_t)pn); push_result_node(parser, (mp_parse_node_t)pn);
} }
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) { STATIC void push_result_token(parser_t *parser) {
const mp_token_t *tok = mp_lexer_cur(lex);
mp_parse_node_t pn; mp_parse_node_t pn;
if (tok->kind == MP_TOKEN_NAME) { mp_lexer_t *lex = parser->lexer;
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len)); if (lex->tok_kind == MP_TOKEN_NAME) {
} else if (tok->kind == MP_TOKEN_NUMBER) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
} else if (lex->tok_kind == MP_TOKEN_NUMBER) {
bool dec = false; bool dec = false;
bool small_int = true; bool small_int = true;
mp_int_t int_val = 0; mp_int_t int_val = 0;
mp_uint_t