Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
TASTE
uPython-mirror
Commits
94fbe971
Commit
94fbe971
authored
Jul 30, 2014
by
Damien George
Browse files
py: Change lexer stream API to return bytes not chars.
Lexer is now 8-bit clean inside strings.
parent
07133415
Changes
5
Hide whitespace changes
Inline
Side-by-side
py/lexer.c
View file @
94fbe971
...
...
@@ -45,7 +45,7 @@
struct
_mp_lexer_t
{
qstr
source_name
;
// name of source
void
*
stream_data
;
// data for stream
mp_lexer_stream_next_
char
_t
stream_next_
char
;
// stream callback to get next
char
mp_lexer_stream_next_
byte
_t
stream_next_
byte
;
// stream callback to get next
byte
mp_lexer_stream_close_t
stream_close
;
// stream callback to free
unichar
chr0
,
chr1
,
chr2
;
// current cached characters from source
...
...
@@ -103,7 +103,7 @@ void mp_token_show(const mp_token_t *tok) {
#define CUR_CHAR(lex) ((lex)->chr0)
STATIC
bool
is_end
(
mp_lexer_t
*
lex
)
{
return
lex
->
chr0
==
MP_LEXER_
CHAR_
EOF
;
return
lex
->
chr0
==
MP_LEXER_EOF
;
}
STATIC
bool
is_physical_newline
(
mp_lexer_t
*
lex
)
{
...
...
@@ -171,7 +171,7 @@ STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
}
STATIC
void
next_char
(
mp_lexer_t
*
lex
)
{
if
(
lex
->
chr0
==
MP_LEXER_
CHAR_
EOF
)
{
if
(
lex
->
chr0
==
MP_LEXER_EOF
)
{
return
;
}
...
...
@@ -200,10 +200,10 @@ STATIC void next_char(mp_lexer_t *lex) {
for
(;
advance
>
0
;
advance
--
)
{
lex
->
chr0
=
lex
->
chr1
;
lex
->
chr1
=
lex
->
chr2
;
lex
->
chr2
=
lex
->
stream_next_
char
(
lex
->
stream_data
);
if
(
lex
->
chr2
==
MP_LEXER_
CHAR_
EOF
)
{
lex
->
chr2
=
lex
->
stream_next_
byte
(
lex
->
stream_data
);
if
(
lex
->
chr2
==
MP_LEXER_EOF
)
{
// EOF
if
(
lex
->
chr1
!=
MP_LEXER_
CHAR_
EOF
&&
lex
->
chr1
!=
'\n'
&&
lex
->
chr1
!=
'\r'
)
{
if
(
lex
->
chr1
!=
MP_LEXER_EOF
&&
lex
->
chr1
!=
'\n'
&&
lex
->
chr1
!=
'\r'
)
{
lex
->
chr2
=
'\n'
;
// insert newline at end of file
}
}
...
...
@@ -491,8 +491,8 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
vstr_add_char
(
&
lex
->
vstr
,
'\\'
);
}
else
{
switch
(
c
)
{
case
MP_LEXER_
CHAR_
EOF
:
break
;
// TODO a proper error message?
case
'\n'
:
c
=
MP_LEXER_
CHAR_
EOF
;
break
;
// TODO check this works correctly (we are supposed to ignore it
case
MP_LEXER_EOF
:
break
;
// TODO a proper error message?
case
'\n'
:
c
=
MP_LEXER_EOF
;
break
;
// TODO check this works correctly (we are supposed to ignore it
case
'\\'
:
break
;
case
'\''
:
break
;
case
'"'
:
break
;
...
...
@@ -546,7 +546,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
break
;
}
}
if
(
c
!=
MP_LEXER_
CHAR_
EOF
)
{
if
(
c
!=
MP_LEXER_EOF
)
{
if
(
c
<
0x110000
&&
!
is_bytes
)
{
vstr_add_char
(
&
lex
->
vstr
,
c
);
}
else
if
(
c
<
0x100
&&
is_bytes
)
{
...
...
@@ -556,7 +556,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
}
}
else
{
vstr_add_char
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
// Add the "character" as a byte so that we remain 8-bit clean.
// This way, strings are parsed correctly whether or not they contain utf-8 chars.
vstr_add_byte
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
}
}
next_char
(
lex
);
...
...
@@ -728,7 +730,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
}
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
char
_t
stream_next_
char
,
mp_lexer_stream_close_t
stream_close
)
{
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
byte
_t
stream_next_
byte
,
mp_lexer_stream_close_t
stream_close
)
{
mp_lexer_t
*
lex
=
m_new_maybe
(
mp_lexer_t
,
1
);
// check for memory allocation error
...
...
@@ -741,7 +743,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
lex
->
source_name
=
src_name
;
lex
->
stream_data
=
stream_data
;
lex
->
stream_next_
char
=
stream_next_
char
;
lex
->
stream_next_
byte
=
stream_next_
byte
;
lex
->
stream_close
=
stream_close
;
lex
->
line
=
1
;
lex
->
column
=
1
;
...
...
@@ -762,18 +764,18 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
lex
->
indent_level
[
0
]
=
0
;
// preload characters
lex
->
chr0
=
stream_next_
char
(
stream_data
);
lex
->
chr1
=
stream_next_
char
(
stream_data
);
lex
->
chr2
=
stream_next_
char
(
stream_data
);
lex
->
chr0
=
stream_next_
byte
(
stream_data
);
lex
->
chr1
=
stream_next_
byte
(
stream_data
);
lex
->
chr2
=
stream_next_
byte
(
stream_data
);
// if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
if
(
lex
->
chr0
==
MP_LEXER_
CHAR_
EOF
)
{
if
(
lex
->
chr0
==
MP_LEXER_EOF
)
{
lex
->
chr0
=
'\n'
;
}
else
if
(
lex
->
chr1
==
MP_LEXER_
CHAR_
EOF
)
{
}
else
if
(
lex
->
chr1
==
MP_LEXER_EOF
)
{
if
(
lex
->
chr0
!=
'\n'
&&
lex
->
chr0
!=
'\r'
)
{
lex
->
chr1
=
'\n'
;
}
}
else
if
(
lex
->
chr2
==
MP_LEXER_
CHAR_
EOF
)
{
}
else
if
(
lex
->
chr2
==
MP_LEXER_EOF
)
{
if
(
lex
->
chr1
!=
'\n'
&&
lex
->
chr1
!=
'\r'
)
{
lex
->
chr2
=
'\n'
;
}
...
...
py/lexer.h
View file @
94fbe971
...
...
@@ -139,18 +139,18 @@ typedef struct _mp_token_t {
mp_uint_t
len
;
// (byte) length of string of token
}
mp_token_t
;
// the next-
char
function must return the next
charac
te
r
in the stream
// it must return MP_LEXER_
CHAR_
EOF if end of stream
// it can be called again after returning MP_LEXER_
CHAR_
EOF, and in that case must return MP_LEXER_
CHAR_
EOF
#define MP_LEXER_
CHAR_
EOF (-1)
typedef
unichar
(
*
mp_lexer_stream_next_
char
_t
)(
void
*
);
// the next-
byte
function must return the next
by
te in the stream
// it must return MP_LEXER_EOF if end of stream
// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
#define MP_LEXER_EOF (-1)
typedef
mp_uint_t
(
*
mp_lexer_stream_next_
byte
_t
)(
void
*
);
typedef
void
(
*
mp_lexer_stream_close_t
)(
void
*
);
typedef
struct
_mp_lexer_t
mp_lexer_t
;
void
mp_token_show
(
const
mp_token_t
*
tok
);
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
char
_t
stream_next_
char
,
mp_lexer_stream_close_t
stream_close
);
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
byte
_t
stream_next_
byte
,
mp_lexer_stream_close_t
stream_close
);
mp_lexer_t
*
mp_lexer_new_from_str_len
(
qstr
src_name
,
const
char
*
str
,
mp_uint_t
len
,
mp_uint_t
free_len
);
void
mp_lexer_free
(
mp_lexer_t
*
lex
);
...
...
py/lexerstr.c
View file @
94fbe971
...
...
@@ -36,11 +36,11 @@ typedef struct _mp_lexer_str_buf_t {
const
char
*
src_end
;
// end (exclusive) of source
}
mp_lexer_str_buf_t
;
STATIC
unichar
str_buf_next_
char
(
mp_lexer_str_buf_t
*
sb
)
{
STATIC
mp_uint_t
str_buf_next_
byte
(
mp_lexer_str_buf_t
*
sb
)
{
if
(
sb
->
src_cur
<
sb
->
src_end
)
{
return
*
sb
->
src_cur
++
;
}
else
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
}
...
...
@@ -57,5 +57,5 @@ mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t
sb
->
src_beg
=
str
;
sb
->
src_cur
=
str
;
sb
->
src_end
=
str
+
len
;
return
mp_lexer_new
(
src_name
,
sb
,
(
mp_lexer_stream_next_
char
_t
)
str_buf_next_
char
,
(
mp_lexer_stream_close_t
)
str_buf_free
);
return
mp_lexer_new
(
src_name
,
sb
,
(
mp_lexer_stream_next_
byte
_t
)
str_buf_next_
byte
,
(
mp_lexer_stream_close_t
)
str_buf_free
);
}
py/lexerunix.c
View file @
94fbe971
...
...
@@ -41,20 +41,20 @@
typedef
struct
_mp_lexer_file_buf_t
{
int
fd
;
char
buf
[
20
];
uint
len
;
uint
pos
;
byte
buf
[
20
];
mp_
uint
_t
len
;
mp_
uint
_t
pos
;
}
mp_lexer_file_buf_t
;
STATIC
unichar
file_buf_next_
char
(
mp_lexer_file_buf_t
*
fb
)
{
STATIC
mp_uint_t
file_buf_next_
byte
(
mp_lexer_file_buf_t
*
fb
)
{
if
(
fb
->
pos
>=
fb
->
len
)
{
if
(
fb
->
len
==
0
)
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
else
{
int
n
=
read
(
fb
->
fd
,
fb
->
buf
,
sizeof
(
fb
->
buf
));
if
(
n
<=
0
)
{
fb
->
len
=
0
;
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
fb
->
len
=
n
;
fb
->
pos
=
0
;
...
...
@@ -78,7 +78,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
int
n
=
read
(
fb
->
fd
,
fb
->
buf
,
sizeof
(
fb
->
buf
));
fb
->
len
=
n
;
fb
->
pos
=
0
;
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
char
_t
)
file_buf_next_
char
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
byte
_t
)
file_buf_next_
byte
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
}
#endif // MICROPY_HELPER_LEXER_UNIX
stmhal/lexerfatfs.c
View file @
94fbe971
...
...
@@ -36,20 +36,20 @@
typedef
struct
_mp_lexer_file_buf_t
{
FIL
fp
;
char
buf
[
20
];
byte
buf
[
20
];
uint16_t
len
;
uint16_t
pos
;
}
mp_lexer_file_buf_t
;
static
unichar
file_buf_next_
char
(
mp_lexer_file_buf_t
*
fb
)
{
STATIC
mp_uint_t
file_buf_next_
byte
(
mp_lexer_file_buf_t
*
fb
)
{
if
(
fb
->
pos
>=
fb
->
len
)
{
if
(
fb
->
len
<
sizeof
(
fb
->
buf
))
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
else
{
UINT
n
;
f_read
(
&
fb
->
fp
,
fb
->
buf
,
sizeof
(
fb
->
buf
),
&
n
);
if
(
n
==
0
)
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
fb
->
len
=
n
;
fb
->
pos
=
0
;
...
...
@@ -58,7 +58,7 @@ static unichar file_buf_next_char(mp_lexer_file_buf_t *fb) {
return
fb
->
buf
[
fb
->
pos
++
];
}
static
void
file_buf_close
(
mp_lexer_file_buf_t
*
fb
)
{
STATIC
void
file_buf_close
(
mp_lexer_file_buf_t
*
fb
)
{
f_close
(
&
fb
->
fp
);
m_del_obj
(
mp_lexer_file_buf_t
,
fb
);
}
...
...
@@ -74,5 +74,5 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
f_read
(
&
fb
->
fp
,
fb
->
buf
,
sizeof
(
fb
->
buf
),
&
n
);
fb
->
len
=
n
;
fb
->
pos
=
0
;
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
char
_t
)
file_buf_next_
char
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
byte
_t
)
file_buf_next_
byte
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment