Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
TASTE
uPython-mirror
Commits
69a818d4
Commit
69a818d4
authored
Jan 12, 2014
by
Damien George
Browse files
py: Improve memory management for parser; add lexer error for bad line cont.
parent
97eb73cf
Changes
3
Show whitespace changes
Inline
Side-by-side
py/lexer.c
View file @
69a818d4
...
...
@@ -299,8 +299,15 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
// backslash (outside string literals) must appear just before a physical newline
next_char
(
lex
);
if
(
!
is_physical_newline
(
lex
))
{
// TODO SyntaxError
assert
(
0
);
// SyntaxError: unexpected character after line continuation character
tok
->
src_name
=
lex
->
name
;
tok
->
src_line
=
lex
->
line
;
tok
->
src_column
=
lex
->
column
;
tok
->
kind
=
MP_TOKEN_BAD_LINE_CONTINUATION
;
vstr_reset
(
&
lex
->
vstr
);
tok
->
str
=
vstr_str
(
&
lex
->
vstr
);
tok
->
len
=
0
;
return
;
}
else
{
next_char
(
lex
);
}
...
...
py/lexer.h
View file @
69a818d4
...
...
@@ -10,19 +10,20 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_INVALID
,
MP_TOKEN_DEDENT_MISMATCH
,
MP_TOKEN_LONELY_STRING_OPEN
,
MP_TOKEN_BAD_LINE_CONTINUATION
,
MP_TOKEN_NEWLINE
,
//
4
MP_TOKEN_INDENT
,
//
5
MP_TOKEN_DEDENT
,
//
6
MP_TOKEN_NEWLINE
,
//
5
MP_TOKEN_INDENT
,
//
6
MP_TOKEN_DEDENT
,
//
7
MP_TOKEN_NAME
,
//
7
MP_TOKEN_NAME
,
//
8
MP_TOKEN_NUMBER
,
MP_TOKEN_STRING
,
MP_TOKEN_BYTES
,
MP_TOKEN_ELLIPSIS
,
MP_TOKEN_KW_FALSE
,
// 1
2
MP_TOKEN_KW_FALSE
,
// 1
3
MP_TOKEN_KW_NONE
,
MP_TOKEN_KW_TRUE
,
MP_TOKEN_KW_AND
,
...
...
@@ -31,7 +32,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_KW_BREAK
,
MP_TOKEN_KW_CLASS
,
MP_TOKEN_KW_CONTINUE
,
MP_TOKEN_KW_DEF
,
// 2
1
MP_TOKEN_KW_DEF
,
// 2
2
MP_TOKEN_KW_DEL
,
MP_TOKEN_KW_ELIF
,
MP_TOKEN_KW_ELSE
,
...
...
@@ -41,7 +42,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_KW_FROM
,
MP_TOKEN_KW_GLOBAL
,
MP_TOKEN_KW_IF
,
MP_TOKEN_KW_IMPORT
,
// 3
1
MP_TOKEN_KW_IMPORT
,
// 3
2
MP_TOKEN_KW_IN
,
MP_TOKEN_KW_IS
,
MP_TOKEN_KW_LAMBDA
,
...
...
@@ -51,12 +52,12 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_KW_PASS
,
MP_TOKEN_KW_RAISE
,
MP_TOKEN_KW_RETURN
,
MP_TOKEN_KW_TRY
,
// 4
1
MP_TOKEN_KW_TRY
,
// 4
2
MP_TOKEN_KW_WHILE
,
MP_TOKEN_KW_WITH
,
MP_TOKEN_KW_YIELD
,
MP_TOKEN_OP_PLUS
,
// 4
5
MP_TOKEN_OP_PLUS
,
// 4
6
MP_TOKEN_OP_MINUS
,
MP_TOKEN_OP_STAR
,
MP_TOKEN_OP_DBL_STAR
,
...
...
@@ -66,7 +67,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_OP_LESS
,
MP_TOKEN_OP_DBL_LESS
,
MP_TOKEN_OP_MORE
,
MP_TOKEN_OP_DBL_MORE
,
// 5
5
MP_TOKEN_OP_DBL_MORE
,
// 5
6
MP_TOKEN_OP_AMPERSAND
,
MP_TOKEN_OP_PIPE
,
MP_TOKEN_OP_CARET
,
...
...
@@ -76,7 +77,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_OP_DBL_EQUAL
,
MP_TOKEN_OP_NOT_EQUAL
,
MP_TOKEN_DEL_PAREN_OPEN
,
// 6
4
MP_TOKEN_DEL_PAREN_OPEN
,
// 6
5
MP_TOKEN_DEL_PAREN_CLOSE
,
MP_TOKEN_DEL_BRACKET_OPEN
,
MP_TOKEN_DEL_BRACKET_CLOSE
,
...
...
@@ -86,7 +87,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_COLON
,
MP_TOKEN_DEL_PERIOD
,
MP_TOKEN_DEL_SEMICOLON
,
MP_TOKEN_DEL_AT
,
// 7
4
MP_TOKEN_DEL_AT
,
// 7
5
MP_TOKEN_DEL_EQUAL
,
MP_TOKEN_DEL_PLUS_EQUAL
,
MP_TOKEN_DEL_MINUS_EQUAL
,
...
...
@@ -96,7 +97,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_PERCENT_EQUAL
,
MP_TOKEN_DEL_AMPERSAND_EQUAL
,
MP_TOKEN_DEL_PIPE_EQUAL
,
MP_TOKEN_DEL_CARET_EQUAL
,
// 8
4
MP_TOKEN_DEL_CARET_EQUAL
,
// 8
5
MP_TOKEN_DEL_DBL_MORE_EQUAL
,
MP_TOKEN_DEL_DBL_LESS_EQUAL
,
MP_TOKEN_DEL_DBL_STAR_EQUAL
,
...
...
py/parse.c
View file @
69a818d4
...
...
@@ -88,6 +88,7 @@ typedef struct _parser_t {
uint
rule_stack_top
;
rule_stack_t
*
rule_stack
;
uint
result_stack_alloc
;
uint
result_stack_top
;
mp_parse_node_t
*
result_stack
;
}
parser_t
;
...
...
@@ -121,7 +122,7 @@ mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
int
num_parse_nodes_allocated
=
0
;
mp_parse_node_struct_t
*
parse_node_new_struct
(
int
rule_id
,
int
num_args
)
{
mp_parse_node_struct_t
*
pn
=
m_
malloc
(
sizeof
(
mp_parse_node_struct_t
)
+
num_args
*
sizeof
(
mp_parse_node_t
)
);
mp_parse_node_struct_t
*
pn
=
m_
new_obj_var
(
mp_parse_node_struct_t
,
mp_parse_node_t
,
num_args
);
pn
->
source
=
0
;
// TODO
pn
->
kind_num_nodes
=
(
rule_id
&
0xff
)
|
(
num_args
<<
8
);
num_parse_nodes_allocated
+=
1
;
...
...
@@ -180,6 +181,10 @@ static mp_parse_node_t peek_result(parser_t *parser, int pos) {
}
static
void
push_result_node
(
parser_t
*
parser
,
mp_parse_node_t
pn
)
{
if
(
parser
->
result_stack_top
>=
parser
->
result_stack_alloc
)
{
parser
->
result_stack
=
m_renew
(
mp_parse_node_t
,
parser
->
result_stack
,
parser
->
result_stack_alloc
,
parser
->
result_stack_alloc
*
2
);
parser
->
result_stack_alloc
*=
2
;
}
parser
->
result_stack
[
parser
->
result_stack_top
++
]
=
pn
;
}
...
...
@@ -252,14 +257,20 @@ static void push_result_rule(parser_t *parser, const rule_t *rule, int num_args)
}
mp_parse_node_t
mp_parse
(
mp_lexer_t
*
lex
,
mp_parse_input_kind_t
input_kind
)
{
parser_t
*
parser
=
m_new
(
parser_t
,
1
);
// allocate memory for the parser and its stacks
parser_t
*
parser
=
m_new_obj
(
parser_t
);
parser
->
rule_stack_alloc
=
64
;
parser
->
rule_stack_top
=
0
;
parser
->
rule_stack
=
m_new
(
rule_stack_t
,
parser
->
rule_stack_alloc
);
parser
->
result_stack
=
m_new
(
mp_parse_node_t
,
1000
)
;
parser
->
result_stack
_alloc
=
64
;
parser
->
result_stack_top
=
0
;
parser
->
result_stack
=
m_new
(
mp_parse_node_t
,
parser
->
result_stack_alloc
);
// work out the top-level rule to use, and push it on the stack
int
top_level_rule
;
switch
(
input_kind
)
{
case
MP_PARSE_SINGLE_INPUT
:
top_level_rule
=
RULE_single_input
;
break
;
...
...
@@ -268,6 +279,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
}
push_rule
(
parser
,
rules
[
top_level_rule
],
0
);
// parse!
uint
n
,
i
;
bool
backtrack
=
false
;
const
rule_t
*
rule
;
...
...
@@ -558,12 +571,25 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
//printf("--------------\n");
//result_stack_show(parser);
assert
(
parser
->
r
es
ul
t
_stack_
top
==
1
);
//printf("
maximum depth
: %d\n", parser->rul
e
_stack_alloc);
//printf("rule stack alloc: %d\n",
parser->rul
e
_stack_
alloc
);
//printf("
result stack alloc
: %d\n", parser->r
es
ul
t
_stack_alloc);
//printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
return
parser
->
result_stack
[
0
];
// get the root parse node that we created
assert
(
parser
->
result_stack_top
==
1
);
mp_parse_node_t
result
=
parser
->
result_stack
[
0
];
finished:
// free the memory that we don't need anymore
m_del
(
rule_stack_t
,
parser
->
rule_stack
,
parser
->
rule_stack_alloc
);
m_del
(
mp_parse_node_t
,
parser
->
result_stack
,
parser
->
result_stack_alloc
);
m_del_obj
(
parser_t
,
parser
);
// return the result
return
result
;
syntax_error:
// TODO these should raise a proper exception
if
(
mp_lexer_is_kind
(
lex
,
MP_TOKEN_INDENT
))
{
mp_lexer_show_error_pythonic
(
lex
,
"IndentationError: unexpected indent"
);
}
else
if
(
mp_lexer_is_kind
(
lex
,
MP_TOKEN_DEDENT_MISMATCH
))
{
...
...
@@ -575,5 +601,6 @@ syntax_error:
#endif
mp_token_show
(
mp_lexer_cur
(
lex
));
}
return
MP_PARSE_NODE_NULL
;
result
=
MP_PARSE_NODE_NULL
;
goto
finished
;
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment