Commit 16677ce3 authored by Damien George's avatar Damien George
Browse files

py: Be more precise about unicode type and disabled unicode behaviour.

parent 0ecd5988
...@@ -492,11 +492,19 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) { ...@@ -492,11 +492,19 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
} }
} }
if (c != MP_LEXER_EOF) { if (c != MP_LEXER_EOF) {
#if MICROPY_PY_BUILTINS_STR_UNICODE
if (c < 0x110000 && !is_bytes) { if (c < 0x110000 && !is_bytes) {
vstr_add_char(&lex->vstr, c); vstr_add_char(&lex->vstr, c);
} else if (c < 0x100 && is_bytes) { } else if (c < 0x100 && is_bytes) {
vstr_add_byte(&lex->vstr, c); vstr_add_byte(&lex->vstr, c);
} else { }
#else
// without unicode everything is just added as an 8-bit byte
if (c < 0x100) {
vstr_add_byte(&lex->vstr, c);
}
#endif
else {
assert(!"TODO: Throw an error, invalid escape code probably"); assert(!"TODO: Throw an error, invalid escape code probably");
} }
} }
......
...@@ -92,7 +92,15 @@ size_t m_get_peak_bytes_allocated(void); ...@@ -92,7 +92,15 @@ size_t m_get_peak_bytes_allocated(void);
/** unichar / UTF-8 *********************************************/ /** unichar / UTF-8 *********************************************/
typedef int unichar; // TODO #if MICROPY_PY_BUILTINS_STR_UNICODE
#include <stdint.h> // only include if we need it
// with unicode enabled we need a type which can fit chars up to 0x10ffff
typedef uint32_t unichar;
#else
// without unicode enabled we can only need to fit chars up to 0xff
// (on 16-bit archs uint is 16-bits and more efficient than uint32_t)
typedef uint unichar;
#endif
unichar utf8_get_char(const byte *s); unichar utf8_get_char(const byte *s);
const byte *utf8_next_char(const byte *s); const byte *utf8_next_char(const byte *s);
......
...@@ -182,11 +182,11 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) { ...@@ -182,11 +182,11 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
return mp_obj_new_str(str, len, true); return mp_obj_new_str(str, len, true);
#else #else
mp_int_t ord = mp_obj_get_int(o_in); mp_int_t ord = mp_obj_get_int(o_in);
if (0 <= ord && ord <= 0x10ffff) { if (0 <= ord && ord <= 0xff) {
char str[1] = {ord}; char str[1] = {ord};
return mp_obj_new_str(str, 1, true); return mp_obj_new_str(str, 1, true);
} else { } else {
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)")); nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(256)"));
} }
#endif #endif
} }
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment