lexer.h 5.95 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
26
27
#ifndef __MICROPY_INCLUDED_PY_LEXER_H__
#define __MICROPY_INCLUDED_PY_LEXER_H__
28

29
30
31
32
#include <stdint.h>

#include "py/mpconfig.h"
#include "py/qstr.h"
33
#include "py/reader.h"
34

35
36
37
38
/* lexer.h -- simple tokeniser for Micro Python
 *
 * Uses (byte) length instead of null termination.
 * Tokens are the same - UTF-8 with (byte) length.
Damien's avatar
Damien committed
39
40
 */

41
42
43
44
45
46
typedef enum _mp_token_kind_t {
    MP_TOKEN_END,                   // 0

    MP_TOKEN_INVALID,
    MP_TOKEN_DEDENT_MISMATCH,
    MP_TOKEN_LONELY_STRING_OPEN,
47
    MP_TOKEN_BAD_LINE_CONTINUATION,
48

49
50
51
    MP_TOKEN_NEWLINE,               // 5
    MP_TOKEN_INDENT,                // 6
    MP_TOKEN_DEDENT,                // 7
52

53
    MP_TOKEN_NAME,                  // 8
54
55
    MP_TOKEN_INTEGER,
    MP_TOKEN_FLOAT_OR_IMAG,
56
57
58
    MP_TOKEN_STRING,
    MP_TOKEN_BYTES,

Damien George's avatar
Damien George committed
59
    MP_TOKEN_ELLIPSIS,
60

61
    MP_TOKEN_KW_FALSE,              // 14
62
63
64
65
66
    MP_TOKEN_KW_NONE,
    MP_TOKEN_KW_TRUE,
    MP_TOKEN_KW_AND,
    MP_TOKEN_KW_AS,
    MP_TOKEN_KW_ASSERT,
67
68
69
70
    #if MICROPY_PY_ASYNC_AWAIT
    MP_TOKEN_KW_ASYNC,
    MP_TOKEN_KW_AWAIT,
    #endif
71
72
73
    MP_TOKEN_KW_BREAK,
    MP_TOKEN_KW_CLASS,
    MP_TOKEN_KW_CONTINUE,
74
    MP_TOKEN_KW_DEF,                // 23
75
76
77
78
79
80
81
82
83
    MP_TOKEN_KW_DEL,
    MP_TOKEN_KW_ELIF,
    MP_TOKEN_KW_ELSE,
    MP_TOKEN_KW_EXCEPT,
    MP_TOKEN_KW_FINALLY,
    MP_TOKEN_KW_FOR,
    MP_TOKEN_KW_FROM,
    MP_TOKEN_KW_GLOBAL,
    MP_TOKEN_KW_IF,
84
    MP_TOKEN_KW_IMPORT,             // 33
85
86
87
88
89
90
91
92
93
    MP_TOKEN_KW_IN,
    MP_TOKEN_KW_IS,
    MP_TOKEN_KW_LAMBDA,
    MP_TOKEN_KW_NONLOCAL,
    MP_TOKEN_KW_NOT,
    MP_TOKEN_KW_OR,
    MP_TOKEN_KW_PASS,
    MP_TOKEN_KW_RAISE,
    MP_TOKEN_KW_RETURN,
94
    MP_TOKEN_KW_TRY,                // 43
95
96
97
98
    MP_TOKEN_KW_WHILE,
    MP_TOKEN_KW_WITH,
    MP_TOKEN_KW_YIELD,

99
    MP_TOKEN_OP_PLUS,               // 47
100
101
102
103
104
105
106
107
108
    MP_TOKEN_OP_MINUS,
    MP_TOKEN_OP_STAR,
    MP_TOKEN_OP_DBL_STAR,
    MP_TOKEN_OP_SLASH,
    MP_TOKEN_OP_DBL_SLASH,
    MP_TOKEN_OP_PERCENT,
    MP_TOKEN_OP_LESS,
    MP_TOKEN_OP_DBL_LESS,
    MP_TOKEN_OP_MORE,
109
    MP_TOKEN_OP_DBL_MORE,           // 57
110
111
112
113
114
115
116
117
118
    MP_TOKEN_OP_AMPERSAND,
    MP_TOKEN_OP_PIPE,
    MP_TOKEN_OP_CARET,
    MP_TOKEN_OP_TILDE,
    MP_TOKEN_OP_LESS_EQUAL,
    MP_TOKEN_OP_MORE_EQUAL,
    MP_TOKEN_OP_DBL_EQUAL,
    MP_TOKEN_OP_NOT_EQUAL,

119
    MP_TOKEN_DEL_PAREN_OPEN,        // 66
120
121
122
123
124
125
126
127
128
    MP_TOKEN_DEL_PAREN_CLOSE,
    MP_TOKEN_DEL_BRACKET_OPEN,
    MP_TOKEN_DEL_BRACKET_CLOSE,
    MP_TOKEN_DEL_BRACE_OPEN,
    MP_TOKEN_DEL_BRACE_CLOSE,
    MP_TOKEN_DEL_COMMA,
    MP_TOKEN_DEL_COLON,
    MP_TOKEN_DEL_PERIOD,
    MP_TOKEN_DEL_SEMICOLON,
129
    MP_TOKEN_DEL_AT,                // 76
130
131
132
133
134
135
136
137
138
    MP_TOKEN_DEL_EQUAL,
    MP_TOKEN_DEL_PLUS_EQUAL,
    MP_TOKEN_DEL_MINUS_EQUAL,
    MP_TOKEN_DEL_STAR_EQUAL,
    MP_TOKEN_DEL_SLASH_EQUAL,
    MP_TOKEN_DEL_DBL_SLASH_EQUAL,
    MP_TOKEN_DEL_PERCENT_EQUAL,
    MP_TOKEN_DEL_AMPERSAND_EQUAL,
    MP_TOKEN_DEL_PIPE_EQUAL,
139
    MP_TOKEN_DEL_CARET_EQUAL,       // 86
140
141
142
143
144
145
    MP_TOKEN_DEL_DBL_MORE_EQUAL,
    MP_TOKEN_DEL_DBL_LESS_EQUAL,
    MP_TOKEN_DEL_DBL_STAR_EQUAL,
    MP_TOKEN_DEL_MINUS_MORE,
} mp_token_kind_t;

146
147
148
149
// this data structure is exposed for efficiency
// public members are: source_name, tok_line, tok_column, tok_kind, vstr
typedef struct _mp_lexer_t {
    qstr source_name;           // name of source
150
    mp_reader_t reader;         // stream source
151
152
153
154
155

    unichar chr0, chr1, chr2;   // current cached characters from source

    mp_uint_t line;             // current source line
    mp_uint_t column;           // current source column
156

157
158
159
160
161
162
163
164
165
166
167
168
    mp_int_t emit_dent;             // non-zero when there are INDENT/DEDENT tokens to emit
    mp_int_t nested_bracket_level;  // >0 when there are nested brackets over multiple lines

    mp_uint_t alloc_indent_level;
    mp_uint_t num_indent_level;
    uint16_t *indent_level;

    mp_uint_t tok_line;         // token source line
    mp_uint_t tok_column;       // token source column
    mp_token_kind_t tok_kind;   // token kind
    vstr_t vstr;                // token data
} mp_lexer_t;
169

170
mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
171
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
172

173
174
void mp_lexer_free(mp_lexer_t *lex);
void mp_lexer_to_next(mp_lexer_t *lex);
175
void mp_lexer_show_token(const mp_lexer_t *lex);
176

177
178
179
180
181
182
183
184
185
186
187
188
189
190
/******************************************************************/
// platform specific import function; must be implemented for a specific port
// TODO tidy up, rename, or put elsewhere

//mp_lexer_t *mp_import_open_file(qstr mod_name);

typedef enum {
    MP_IMPORT_STAT_NO_EXIST,
    MP_IMPORT_STAT_DIR,
    MP_IMPORT_STAT_FILE,
} mp_import_stat_t;

mp_import_stat_t mp_import_stat(const char *path);
mp_lexer_t *mp_lexer_new_from_file(const char *filename);
191

192
193
194
195
#if MICROPY_HELPER_LEXER_UNIX
mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd);
#endif

196
#endif // __MICROPY_INCLUDED_PY_LEXER_H__