Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
TASTE
uPython-mirror
Commits
2ba2299d
Commit
2ba2299d
authored
Jun 04, 2014
by
Chris Angelico
Committed by
Paul Sokolovsky
Jun 27, 2014
Browse files
lexer, vstr: Add unicode support.
parent
1e3781bc
Changes
2
Hide whitespace changes
Inline
Side-by-side
py/lexer.c
View file @
2ba2299d
...
@@ -502,19 +502,32 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
...
@@ -502,19 +502,32 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
case
'v'
:
c
=
0x0b
;
break
;
case
'v'
:
c
=
0x0b
;
break
;
case
'f'
:
c
=
0x0c
;
break
;
case
'f'
:
c
=
0x0c
;
break
;
case
'r'
:
c
=
0x0d
;
break
;
case
'r'
:
c
=
0x0d
;
break
;
case
'u'
:
case
'U'
:
if
(
is_bytes
)
{
// b'\u1234' == b'\\u1234'
vstr_add_char
(
&
lex
->
vstr
,
'\\'
);
break
;
}
// Otherwise fall through.
case
'x'
:
case
'x'
:
{
{
uint
num
=
0
;
uint
num
=
0
;
if
(
!
get_hex
(
lex
,
2
,
&
num
))
{
if
(
!
get_hex
(
lex
,
(
c
==
'x'
?
2
:
c
==
'u'
?
4
:
8
)
,
&
num
))
{
// TODO error message
// TODO error message
assert
(
0
);
assert
(
0
);
}
}
c
=
num
;
c
=
num
;
break
;
break
;
}
}
case
'N'
:
break
;
// TODO \N{name} only in strings
case
'N'
:
case
'u'
:
break
;
// TODO \uxxxx only in strings
// Supporting '\N{LATIN SMALL LETTER A}' == 'a' would require keeping the
case
'U'
:
break
;
// TODO \Uxxxxxxxx only in strings
// entire Unicode name table in the core. As of Unicode 6.3.0, that's nearly
// 3MB of text; even gzip-compressed and with minimal structure, it'll take
// roughly half a meg of storage. This form of Unicode escape may be added
// later on, but it's definitely not a priority right now. -- CJA 20140607
assert
(
!
"Unicode name escapes not supported"
);
break
;
default:
default:
if
(
c
>=
'0'
&&
c
<=
'7'
)
{
if
(
c
>=
'0'
&&
c
<=
'7'
)
{
// Octal sequence, 1-3 chars
// Octal sequence, 1-3 chars
...
@@ -533,7 +546,13 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
...
@@ -533,7 +546,13 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
}
}
}
if
(
c
!=
MP_LEXER_CHAR_EOF
)
{
if
(
c
!=
MP_LEXER_CHAR_EOF
)
{
vstr_add_char
(
&
lex
->
vstr
,
c
);
if
(
c
<
0x110000
&&
!
is_bytes
)
{
vstr_add_char
(
&
lex
->
vstr
,
c
);
}
else
if
(
c
<
0x100
&&
is_bytes
)
{
vstr_add_byte
(
&
lex
->
vstr
,
c
);
}
else
{
assert
(
!
"TODO: Throw an error, invalid escape code probably"
);
}
}
}
}
else
{
}
else
{
vstr_add_char
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
vstr_add_char
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
...
...
py/vstr.c
View file @
2ba2299d
...
@@ -199,12 +199,40 @@ void vstr_add_byte(vstr_t *vstr, byte b) {
...
@@ -199,12 +199,40 @@ void vstr_add_byte(vstr_t *vstr, byte b) {
}
}
void
vstr_add_char
(
vstr_t
*
vstr
,
unichar
c
)
{
void
vstr_add_char
(
vstr_t
*
vstr
,
unichar
c
)
{
// TODO UNICODE
// TODO: Can this be simplified and deduplicated?
byte
*
buf
=
(
byte
*
)
vstr_add_len
(
vstr
,
1
);
// Is it worth just calling vstr_add_len(vstr, 4)?
if
(
buf
==
NULL
)
{
if
(
c
<
0x80
)
{
return
;
byte
*
buf
=
(
byte
*
)
vstr_add_len
(
vstr
,
1
);
if
(
buf
==
NULL
)
{
return
;
}
*
buf
=
(
byte
)
c
;
}
else
if
(
c
<
0x800
)
{
byte
*
buf
=
(
byte
*
)
vstr_add_len
(
vstr
,
2
);
if
(
buf
==
NULL
)
{
return
;
}
buf
[
0
]
=
(
c
>>
6
)
|
0xC0
;
buf
[
1
]
=
(
c
&
0x3F
)
|
0x80
;
}
else
if
(
c
<
0x10000
)
{
byte
*
buf
=
(
byte
*
)
vstr_add_len
(
vstr
,
3
);
if
(
buf
==
NULL
)
{
return
;
}
buf
[
0
]
=
(
c
>>
12
)
|
0xE0
;
buf
[
1
]
=
((
c
>>
6
)
&
0x3F
)
|
0x80
;
buf
[
2
]
=
(
c
&
0x3F
)
|
0x80
;
}
else
{
assert
(
c
<
0x110000
);
byte
*
buf
=
(
byte
*
)
vstr_add_len
(
vstr
,
4
);
if
(
buf
==
NULL
)
{
return
;
}
buf
[
0
]
=
(
c
>>
18
)
|
0xF0
;
buf
[
1
]
=
((
c
>>
12
)
&
0x3F
)
|
0x80
;
buf
[
2
]
=
((
c
>>
6
)
&
0x3F
)
|
0x80
;
buf
[
3
]
=
(
c
&
0x3F
)
|
0x80
;
}
}
buf
[
0
]
=
c
;
}
}
void
vstr_add_str
(
vstr_t
*
vstr
,
const
char
*
str
)
{
void
vstr_add_str
(
vstr_t
*
vstr
,
const
char
*
str
)
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment