Commit 5bdf1650 authored by Damien George's avatar Damien George
Browse files

py/lexer: Make lexer use an mp_reader as its source.

parent 66d955c2
...@@ -35,10 +35,11 @@ typedef struct _mp_lexer_str32_buf_t { ...@@ -35,10 +35,11 @@ typedef struct _mp_lexer_str32_buf_t {
uint8_t byte_off; uint8_t byte_off;
} mp_lexer_str32_buf_t; } mp_lexer_str32_buf_t;
STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) { STATIC mp_uint_t str32_buf_next_byte(void *sb_in) {
mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
byte c = sb->val & 0xff; byte c = sb->val & 0xff;
if (c == 0) { if (c == 0) {
return MP_LEXER_EOF; return MP_READER_EOF;
} }
if (++sb->byte_off > 3) { if (++sb->byte_off > 3) {
...@@ -51,7 +52,8 @@ STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) { ...@@ -51,7 +52,8 @@ STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) {
return c; return c;
} }
STATIC void str32_buf_free(mp_lexer_str32_buf_t *sb) { STATIC void str32_buf_free(void *sb_in) {
mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
m_del_obj(mp_lexer_str32_buf_t, sb); m_del_obj(mp_lexer_str32_buf_t, sb);
} }
...@@ -63,7 +65,8 @@ mp_lexer_t *mp_lexer_new_from_str32(qstr src_name, const char *str, mp_uint_t le ...@@ -63,7 +65,8 @@ mp_lexer_t *mp_lexer_new_from_str32(qstr src_name, const char *str, mp_uint_t le
sb->byte_off = (uint32_t)str & 3; sb->byte_off = (uint32_t)str & 3;
sb->src_cur = (uint32_t*)(str - sb->byte_off); sb->src_cur = (uint32_t*)(str - sb->byte_off);
sb->val = *sb->src_cur++ >> sb->byte_off * 8; sb->val = *sb->src_cur++ >> sb->byte_off * 8;
return mp_lexer_new(src_name, sb, (mp_lexer_stream_next_byte_t)str32_buf_next_byte, (mp_lexer_stream_close_t)str32_buf_free); mp_reader_t reader = {sb, str32_buf_next_byte, str32_buf_free};
return mp_lexer_new(src_name, reader);
} }
#endif // MICROPY_ENABLE_COMPILER #endif // MICROPY_ENABLE_COMPILER
...@@ -52,6 +52,7 @@ STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) { ...@@ -52,6 +52,7 @@ STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
return i == len && *str == 0; return i == len && *str == 0;
} }
#define MP_LEXER_EOF ((unichar)MP_READER_EOF)
#define CUR_CHAR(lex) ((lex)->chr0) #define CUR_CHAR(lex) ((lex)->chr0)
STATIC bool is_end(mp_lexer_t *lex) { STATIC bool is_end(mp_lexer_t *lex) {
...@@ -145,7 +146,7 @@ STATIC void next_char(mp_lexer_t *lex) { ...@@ -145,7 +146,7 @@ STATIC void next_char(mp_lexer_t *lex) {
lex->chr0 = lex->chr1; lex->chr0 = lex->chr1;
lex->chr1 = lex->chr2; lex->chr1 = lex->chr2;
lex->chr2 = lex->stream_next_byte(lex->stream_data); lex->chr2 = lex->reader.readbyte(lex->reader.data);
if (lex->chr0 == '\r') { if (lex->chr0 == '\r') {
// CR is a new line, converted to LF // CR is a new line, converted to LF
...@@ -153,7 +154,7 @@ STATIC void next_char(mp_lexer_t *lex) { ...@@ -153,7 +154,7 @@ STATIC void next_char(mp_lexer_t *lex) {
if (lex->chr1 == '\n') { if (lex->chr1 == '\n') {
// CR LF is a single new line // CR LF is a single new line
lex->chr1 = lex->chr2; lex->chr1 = lex->chr2;
lex->chr2 = lex->stream_next_byte(lex->stream_data); lex->chr2 = lex->reader.readbyte(lex->reader.data);
} }
} }
...@@ -689,21 +690,17 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) { ...@@ -689,21 +690,17 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
} }
} }
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close) { mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
mp_lexer_t *lex = m_new_obj_maybe(mp_lexer_t); mp_lexer_t *lex = m_new_obj_maybe(mp_lexer_t);
// check for memory allocation error // check for memory allocation error
if (lex == NULL) { if (lex == NULL) {
if (stream_close) { reader.close(reader.data);
stream_close(stream_data);
}
return NULL; return NULL;
} }
lex->source_name = src_name; lex->source_name = src_name;
lex->stream_data = stream_data; lex->reader = reader;
lex->stream_next_byte = stream_next_byte;
lex->stream_close = stream_close;
lex->line = 1; lex->line = 1;
lex->column = 1; lex->column = 1;
lex->emit_dent = 0; lex->emit_dent = 0;
...@@ -724,9 +721,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_ ...@@ -724,9 +721,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
lex->indent_level[0] = 0; lex->indent_level[0] = 0;
// preload characters // preload characters
lex->chr0 = stream_next_byte(stream_data); lex->chr0 = reader.readbyte(reader.data);
lex->chr1 = stream_next_byte(stream_data); lex->chr1 = reader.readbyte(reader.data);
lex->chr2 = stream_next_byte(stream_data); lex->chr2 = reader.readbyte(reader.data);
// if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end // if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
if (lex->chr0 == MP_LEXER_EOF) { if (lex->chr0 == MP_LEXER_EOF) {
...@@ -756,7 +753,7 @@ mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t ...@@ -756,7 +753,7 @@ mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t
if (!mp_reader_new_mem(&reader, (const byte*)str, len, free_len)) { if (!mp_reader_new_mem(&reader, (const byte*)str, len, free_len)) {
return NULL; return NULL;
} }
return mp_lexer_new(src_name, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close); return mp_lexer_new(src_name, reader);
} }
#if MICROPY_READER_POSIX || MICROPY_READER_FATFS #if MICROPY_READER_POSIX || MICROPY_READER_FATFS
...@@ -767,7 +764,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) { ...@@ -767,7 +764,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
if (ret != 0) { if (ret != 0) {
return NULL; return NULL;
} }
return mp_lexer_new(qstr_from_str(filename), reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close); return mp_lexer_new(qstr_from_str(filename), reader);
} }
#if MICROPY_HELPER_LEXER_UNIX #if MICROPY_HELPER_LEXER_UNIX
...@@ -778,7 +775,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) { ...@@ -778,7 +775,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
if (ret != 0) { if (ret != 0) {
return NULL; return NULL;
} }
return mp_lexer_new(filename, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close); return mp_lexer_new(filename, reader);
} }
#endif #endif
...@@ -787,9 +784,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) { ...@@ -787,9 +784,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
void mp_lexer_free(mp_lexer_t *lex) { void mp_lexer_free(mp_lexer_t *lex) {
if (lex) { if (lex) {
if (lex->stream_close) { lex->reader.close(lex->reader.data);
lex->stream_close(lex->stream_data);
}
vstr_clear(&lex->vstr); vstr_clear(&lex->vstr);
m_del(uint16_t, lex->indent_level, lex->alloc_indent_level); m_del(uint16_t, lex->indent_level, lex->alloc_indent_level);
m_del_obj(mp_lexer_t, lex); m_del_obj(mp_lexer_t, lex);
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "py/mpconfig.h" #include "py/mpconfig.h"
#include "py/qstr.h" #include "py/qstr.h"
#include "py/reader.h"
/* lexer.h -- simple tokeniser for Micro Python /* lexer.h -- simple tokeniser for Micro Python
* *
...@@ -142,21 +143,11 @@ typedef enum _mp_token_kind_t { ...@@ -142,21 +143,11 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_MINUS_MORE, MP_TOKEN_DEL_MINUS_MORE,
} mp_token_kind_t; } mp_token_kind_t;
// the next-byte function must return the next byte in the stream
// it must return MP_LEXER_EOF if end of stream
// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
#define MP_LEXER_EOF ((unichar)(-1))
typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
typedef void (*mp_lexer_stream_close_t)(void*);
// this data structure is exposed for efficiency // this data structure is exposed for efficiency
// public members are: source_name, tok_line, tok_column, tok_kind, vstr // public members are: source_name, tok_line, tok_column, tok_kind, vstr
typedef struct _mp_lexer_t { typedef struct _mp_lexer_t {
qstr source_name; // name of source qstr source_name; // name of source
void *stream_data; // data for stream mp_reader_t reader; // stream source
mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
mp_lexer_stream_close_t stream_close; // stream callback to free
unichar chr0, chr1, chr2; // current cached characters from source unichar chr0, chr1, chr2; // current cached characters from source
...@@ -176,7 +167,7 @@ typedef struct _mp_lexer_t { ...@@ -176,7 +167,7 @@ typedef struct _mp_lexer_t {
vstr_t vstr; // token data vstr_t vstr; // token data
} mp_lexer_t; } mp_lexer_t;
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close); mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len); mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
void mp_lexer_free(mp_lexer_t *lex); void mp_lexer_free(mp_lexer_t *lex);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment