Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
TASTE
uPython-mirror
Commits
b1b84055
Commit
b1b84055
authored
Jun 28, 2014
by
Damien George
Browse files
Merge branch 'unicode'
parents
8993fb6c
635b60e2
Changes
22
Show whitespace changes
Inline
Side-by-side
py/builtin.c
View file @
b1b84055
...
@@ -172,13 +172,40 @@ STATIC mp_obj_t mp_builtin_callable(mp_obj_t o_in) {
...
@@ -172,13 +172,40 @@ STATIC mp_obj_t mp_builtin_callable(mp_obj_t o_in) {
MP_DEFINE_CONST_FUN_OBJ_1
(
mp_builtin_callable_obj
,
mp_builtin_callable
);
MP_DEFINE_CONST_FUN_OBJ_1
(
mp_builtin_callable_obj
,
mp_builtin_callable
);
STATIC
mp_obj_t
mp_builtin_chr
(
mp_obj_t
o_in
)
{
STATIC
mp_obj_t
mp_builtin_chr
(
mp_obj_t
o_in
)
{
int
ord
=
mp_obj_get_int
(
o_in
);
#if MICROPY_PY_BUILTINS_STR_UNICODE
machine_int_t
c
=
mp_obj_get_int
(
o_in
);
char
str
[
4
];
int
len
=
0
;
if
(
c
<
0x80
)
{
*
str
=
c
;
len
=
1
;
}
else
if
(
c
<
0x800
)
{
str
[
0
]
=
(
c
>>
6
)
|
0xC0
;
str
[
1
]
=
(
c
&
0x3F
)
|
0x80
;
len
=
2
;
}
else
if
(
c
<
0x10000
)
{
str
[
0
]
=
(
c
>>
12
)
|
0xE0
;
str
[
1
]
=
((
c
>>
6
)
&
0x3F
)
|
0x80
;
str
[
2
]
=
(
c
&
0x3F
)
|
0x80
;
len
=
3
;
}
else
if
(
c
<
0x110000
)
{
str
[
0
]
=
(
c
>>
18
)
|
0xF0
;
str
[
1
]
=
((
c
>>
12
)
&
0x3F
)
|
0x80
;
str
[
2
]
=
((
c
>>
6
)
&
0x3F
)
|
0x80
;
str
[
3
]
=
(
c
&
0x3F
)
|
0x80
;
len
=
4
;
}
else
{
nlr_raise
(
mp_obj_new_exception_msg
(
&
mp_type_ValueError
,
"chr() arg not in range(0x110000)"
));
}
return
mp_obj_new_str
(
str
,
len
,
true
);
#else
machine_int_t
ord
=
mp_obj_get_int
(
o_in
);
if
(
0
<=
ord
&&
ord
<=
0x10ffff
)
{
if
(
0
<=
ord
&&
ord
<=
0x10ffff
)
{
char
str
[
1
]
=
{
ord
};
char
str
[
1
]
=
{
ord
};
return
mp_obj_new_str
(
str
,
1
,
true
);
return
mp_obj_new_str
(
str
,
1
,
true
);
}
else
{
}
else
{
nlr_raise
(
mp_obj_new_exception_msg
(
&
mp_type_ValueError
,
"chr() arg not in range(0x110000)"
));
nlr_raise
(
mp_obj_new_exception_msg
(
&
mp_type_ValueError
,
"chr() arg not in range(0x110000)"
));
}
}
#endif
}
}
MP_DEFINE_CONST_FUN_OBJ_1
(
mp_builtin_chr_obj
,
mp_builtin_chr
);
MP_DEFINE_CONST_FUN_OBJ_1
(
mp_builtin_chr_obj
,
mp_builtin_chr
);
...
@@ -344,13 +371,32 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_oct_obj, mp_builtin_oct);
...
@@ -344,13 +371,32 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_oct_obj, mp_builtin_oct);
STATIC
mp_obj_t
mp_builtin_ord
(
mp_obj_t
o_in
)
{
STATIC
mp_obj_t
mp_builtin_ord
(
mp_obj_t
o_in
)
{
uint
len
;
uint
len
;
const
char
*
str
=
mp_obj_str_get_data
(
o_in
,
&
len
);
const
char
*
str
=
mp_obj_str_get_data
(
o_in
,
&
len
);
#if MICROPY_PY_BUILTINS_STR_UNICODE
uint
charlen
=
unichar_charlen
(
str
,
len
);
if
(
charlen
==
1
)
{
if
(
MP_OBJ_IS_STR
(
o_in
)
&&
UTF8_IS_NONASCII
(
*
str
))
{
machine_int_t
ord
=
*
str
++
&
0x7F
;
for
(
machine_int_t
mask
=
0x40
;
ord
&
mask
;
mask
>>=
1
)
{
ord
&=
~
mask
;
}
while
(
UTF8_IS_CONT
(
*
str
))
{
ord
=
(
ord
<<
6
)
|
(
*
str
++
&
0x3F
);
}
return
mp_obj_new_int
(
ord
);
}
else
{
return
mp_obj_new_int
(((
const
byte
*
)
str
)[
0
]);
}
}
else
{
nlr_raise
(
mp_obj_new_exception_msg_varg
(
&
mp_type_TypeError
,
"ord() expected a character, but string of length %d found"
,
charlen
));
}
#else
if
(
len
==
1
)
{
if
(
len
==
1
)
{
// don't sign extend when converting to ord
// don't sign extend when converting to ord
// TODO unicode
return
mp_obj_new_int
(((
const
byte
*
)
str
)[
0
]);
return
mp_obj_new_int
(((
const
byte
*
)
str
)[
0
]);
}
else
{
}
else
{
nlr_raise
(
mp_obj_new_exception_msg_varg
(
&
mp_type_TypeError
,
"ord() expected a character, but string of length %d found"
,
len
));
nlr_raise
(
mp_obj_new_exception_msg_varg
(
&
mp_type_TypeError
,
"ord() expected a character, but string of length %d found"
,
len
));
}
}
#endif
}
}
MP_DEFINE_CONST_FUN_OBJ_1
(
mp_builtin_ord_obj
,
mp_builtin_ord
);
MP_DEFINE_CONST_FUN_OBJ_1
(
mp_builtin_ord_obj
,
mp_builtin_ord
);
...
...
py/lexer.c
View file @
b1b84055
...
@@ -502,19 +502,32 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
...
@@ -502,19 +502,32 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
case
'v'
:
c
=
0x0b
;
break
;
case
'v'
:
c
=
0x0b
;
break
;
case
'f'
:
c
=
0x0c
;
break
;
case
'f'
:
c
=
0x0c
;
break
;
case
'r'
:
c
=
0x0d
;
break
;
case
'r'
:
c
=
0x0d
;
break
;
case
'u'
:
case
'U'
:
if
(
is_bytes
)
{
// b'\u1234' == b'\\u1234'
vstr_add_char
(
&
lex
->
vstr
,
'\\'
);
break
;
}
// Otherwise fall through.
case
'x'
:
case
'x'
:
{
{
uint
num
=
0
;
uint
num
=
0
;
if
(
!
get_hex
(
lex
,
2
,
&
num
))
{
if
(
!
get_hex
(
lex
,
(
c
==
'x'
?
2
:
c
==
'u'
?
4
:
8
)
,
&
num
))
{
// TODO error message
// TODO error message
assert
(
0
);
assert
(
0
);
}
}
c
=
num
;
c
=
num
;
break
;
break
;
}
}
case
'N'
:
break
;
// TODO \N{name} only in strings
case
'N'
:
case
'u'
:
break
;
// TODO \uxxxx only in strings
// Supporting '\N{LATIN SMALL LETTER A}' == 'a' would require keeping the
case
'U'
:
break
;
// TODO \Uxxxxxxxx only in strings
// entire Unicode name table in the core. As of Unicode 6.3.0, that's nearly
// 3MB of text; even gzip-compressed and with minimal structure, it'll take
// roughly half a meg of storage. This form of Unicode escape may be added
// later on, but it's definitely not a priority right now. -- CJA 20140607
assert
(
!
"Unicode name escapes not supported"
);
break
;
default:
default:
if
(
c
>=
'0'
&&
c
<=
'7'
)
{
if
(
c
>=
'0'
&&
c
<=
'7'
)
{
// Octal sequence, 1-3 chars
// Octal sequence, 1-3 chars
...
@@ -533,7 +546,13 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
...
@@ -533,7 +546,13 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
}
}
}
if
(
c
!=
MP_LEXER_CHAR_EOF
)
{
if
(
c
!=
MP_LEXER_CHAR_EOF
)
{
if
(
c
<
0x110000
&&
!
is_bytes
)
{
vstr_add_char
(
&
lex
->
vstr
,
c
);
vstr_add_char
(
&
lex
->
vstr
,
c
);
}
else
if
(
c
<
0x100
&&
is_bytes
)
{
vstr_add_byte
(
&
lex
->
vstr
,
c
);
}
else
{
assert
(
!
"TODO: Throw an error, invalid escape code probably"
);
}
}
}
}
else
{
}
else
{
vstr_add_char
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
vstr_add_char
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
...
...
py/misc.h
View file @
b1b84055
...
@@ -100,7 +100,9 @@ bool unichar_isupper(unichar c);
...
@@ -100,7 +100,9 @@ bool unichar_isupper(unichar c);
bool
unichar_islower
(
unichar
c
);
bool
unichar_islower
(
unichar
c
);
unichar
unichar_tolower
(
unichar
c
);
unichar
unichar_tolower
(
unichar
c
);
unichar
unichar_toupper
(
unichar
c
);
unichar
unichar_toupper
(
unichar
c
);
#define unichar_charlen(s, bytelen) (bytelen)
uint
unichar_charlen
(
const
char
*
str
,
uint
len
);
// TODO this should return machine_uint_t
#define UTF8_IS_NONASCII(ch) ((ch) & 0x80)
#define UTF8_IS_CONT(ch) (((ch) & 0xC0) == 0x80)
/** variable string *********************************************/
/** variable string *********************************************/
...
@@ -164,4 +166,18 @@ int DEBUG_printf(const char *fmt, ...);
...
@@ -164,4 +166,18 @@ int DEBUG_printf(const char *fmt, ...);
extern
uint
mp_verbose_flag
;
extern
uint
mp_verbose_flag
;
// This is useful for unicode handling. Some CPU archs has
// special instructions for efficient implentation of this
// function (e.g. CLZ on ARM).
// NOTE: this function is unused at the moment
#ifndef count_lead_ones
static
inline
uint
count_lead_ones
(
byte
val
)
{
uint
c
=
0
;
for
(
byte
mask
=
0x80
;
val
&
mask
;
mask
>>=
1
)
{
c
++
;
}
return
c
;
}
#endif
#endif // _INCLUDED_MINILIB_H
#endif // _INCLUDED_MINILIB_H
py/mpconfig.h
View file @
b1b84055
...
@@ -249,6 +249,11 @@ typedef double mp_float_t;
...
@@ -249,6 +249,11 @@ typedef double mp_float_t;
/*****************************************************************************/
/*****************************************************************************/
/* Fine control over Python builtins, classes, modules, etc */
/* Fine control over Python builtins, classes, modules, etc */
// Whether str object is proper unicode
#ifndef MICROPY_PY_BUILTINS_STR_UNICODE
#define MICROPY_PY_BUILTINS_STR_UNICODE (0)
#endif
// Whether to support bytearray object
// Whether to support bytearray object
#ifndef MICROPY_PY_BUILTINS_BYTEARRAY
#ifndef MICROPY_PY_BUILTINS_BYTEARRAY
#define MICROPY_PY_BUILTINS_BYTEARRAY (1)
#define MICROPY_PY_BUILTINS_BYTEARRAY (1)
...
...
py/obj.c
View file @
b1b84055
...
@@ -357,7 +357,12 @@ uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index,
...
@@ -357,7 +357,12 @@ uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index,
// may return MP_OBJ_NULL
// may return MP_OBJ_NULL
mp_obj_t
mp_obj_len_maybe
(
mp_obj_t
o_in
)
{
mp_obj_t
mp_obj_len_maybe
(
mp_obj_t
o_in
)
{
if
(
MP_OBJ_IS_STR
(
o_in
)
||
MP_OBJ_IS_TYPE
(
o_in
,
&
mp_type_bytes
))
{
if
(
#if !MICROPY_PY_BUILTINS_STR_UNICODE
// It's simple - unicode is slow, non-unicode is fast
MP_OBJ_IS_STR
(
o_in
)
||
#endif
MP_OBJ_IS_TYPE
(
o_in
,
&
mp_type_bytes
))
{
return
MP_OBJ_NEW_SMALL_INT
((
machine_int_t
)
mp_obj_str_get_len
(
o_in
));
return
MP_OBJ_NEW_SMALL_INT
((
machine_int_t
)
mp_obj_str_get_len
(
o_in
));
}
else
{
}
else
{
mp_obj_type_t
*
type
=
mp_obj_get_type
(
o_in
);
mp_obj_type_t
*
type
=
mp_obj_get_type
(
o_in
);
...
...
py/objstr.c
View file @
b1b84055
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include "mpconfig.h"
#include "mpconfig.h"
#include "nlr.h"
#include "nlr.h"
#include "misc.h"
#include "misc.h"
#include "unicode.h"
#include "qstr.h"
#include "qstr.h"
#include "obj.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime0.h"
...
@@ -43,16 +44,7 @@
...
@@ -43,16 +44,7 @@
STATIC
mp_obj_t
str_modulo_format
(
mp_obj_t
pattern
,
uint
n_args
,
const
mp_obj_t
*
args
,
mp_obj_t
dict
);
STATIC
mp_obj_t
str_modulo_format
(
mp_obj_t
pattern
,
uint
n_args
,
const
mp_obj_t
*
args
,
mp_obj_t
dict
);
const
mp_obj_t
mp_const_empty_bytes
;
const
mp_obj_t
mp_const_empty_bytes
;
// use this macro to extract the string hash
mp_obj_t
mp_obj_new_str_iterator
(
mp_obj_t
str
);
#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
// use this macro to extract the string length
#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
// use this macro to extract the string data and length
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
STATIC
mp_obj_t
mp_obj_new_str_iterator
(
mp_obj_t
str
);
STATIC
mp_obj_t
mp_obj_new_bytes_iterator
(
mp_obj_t
str
);
STATIC
mp_obj_t
mp_obj_new_bytes_iterator
(
mp_obj_t
str
);
STATIC
NORETURN
void
bad_implicit_conversion
(
mp_obj_t
self_in
);
STATIC
NORETURN
void
bad_implicit_conversion
(
mp_obj_t
self_in
);
STATIC
NORETURN
void
arg_type_mixup
();
STATIC
NORETURN
void
arg_type_mixup
();
...
@@ -259,7 +251,7 @@ STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, cons
...
@@ -259,7 +251,7 @@ STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, cons
return
NULL
;
return
NULL
;
}
}
STATIC
mp_obj_t
str_binary_op
(
int
op
,
mp_obj_t
lhs_in
,
mp_obj_t
rhs_in
)
{
mp_obj_t
mp_obj_
str_binary_op
(
int
op
,
mp_obj_t
lhs_in
,
mp_obj_t
rhs_in
)
{
GET_STR_DATA_LEN
(
lhs_in
,
lhs_data
,
lhs_len
);
GET_STR_DATA_LEN
(
lhs_in
,
lhs_data
,
lhs_len
);
mp_obj_type_t
*
lhs_type
=
mp_obj_get_type
(
lhs_in
);
mp_obj_type_t
*
lhs_type
=
mp_obj_get_type
(
lhs_in
);
mp_obj_type_t
*
rhs_type
=
mp_obj_get_type
(
rhs_in
);
mp_obj_type_t
*
rhs_type
=
mp_obj_get_type
(
rhs_in
);
...
@@ -352,11 +344,14 @@ uncomparable:
...
@@ -352,11 +344,14 @@ uncomparable:
return
MP_OBJ_NULL
;
// op not supported
return
MP_OBJ_NULL
;
// op not supported
}
}
#if !MICROPY_PY_BUILTINS_STR_UNICODE
// objstrunicode defines own version
const
byte
*
str_index_to_ptr
(
const
mp_obj_type_t
*
type
,
const
byte
*
self_data
,
uint
self_len
,
const
byte
*
str_index_to_ptr
(
const
mp_obj_type_t
*
type
,
const
byte
*
self_data
,
uint
self_len
,
mp_obj_t
index
,
bool
is_slice
)
{
mp_obj_t
index
,
bool
is_slice
)
{
machine_uint_t
index_val
=
mp_get_index
(
type
,
self_len
,
index
,
is_slice
);
machine_uint_t
index_val
=
mp_get_index
(
type
,
self_len
,
index
,
is_slice
);
return
self_data
+
index_val
;
return
self_data
+
index_val
;
}
}
#endif
STATIC
mp_obj_t
str_subscr
(
mp_obj_t
self_in
,
mp_obj_t
index
,
mp_obj_t
value
)
{
STATIC
mp_obj_t
str_subscr
(
mp_obj_t
self_in
,
mp_obj_t
index
,
mp_obj_t
value
)
{
mp_obj_type_t
*
type
=
mp_obj_get_type
(
self_in
);
mp_obj_type_t
*
type
=
mp_obj_get_type
(
self_in
);
...
@@ -571,7 +566,6 @@ STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) {
...
@@ -571,7 +566,6 @@ STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) {
return
res
;
return
res
;
}
}
STATIC
mp_obj_t
str_finder
(
uint
n_args
,
const
mp_obj_t
*
args
,
machine_int_t
direction
,
bool
is_index
)
{
STATIC
mp_obj_t
str_finder
(
uint
n_args
,
const
mp_obj_t
*
args
,
machine_int_t
direction
,
bool
is_index
)
{
const
mp_obj_type_t
*
self_type
=
mp_obj_get_type
(
args
[
0
]);
const
mp_obj_type_t
*
self_type
=
mp_obj_get_type
(
args
[
0
]);
assert
(
2
<=
n_args
&&
n_args
<=
4
);
assert
(
2
<=
n_args
&&
n_args
<=
4
);
...
@@ -600,6 +594,11 @@ STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t dire
...
@@ -600,6 +594,11 @@ STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t dire
}
}
}
else
{
}
else
{
// found
// found
#if MICROPY_PY_BUILTINS_STR_UNICODE
if
(
self_type
==
&
mp_type_str
)
{
return
MP_OBJ_NEW_SMALL_INT
(
utf8_ptr_to_index
(
haystack
,
p
));
}
#endif
return
MP_OBJ_NEW_SMALL_INT
(
p
-
haystack
);
return
MP_OBJ_NEW_SMALL_INT
(
p
-
haystack
);
}
}
}
}
...
@@ -1449,7 +1448,7 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
...
@@ -1449,7 +1448,7 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
// if needle_len is zero then we count each gap between characters as an occurrence
// if needle_len is zero then we count each gap between characters as an occurrence
if
(
needle_len
==
0
)
{
if
(
needle_len
==
0
)
{
return
MP_OBJ_NEW_SMALL_INT
(
unichar_charlen
((
const
char
*
)
start
,
end
-
start
)
+
1
);
return
MP_OBJ_NEW_SMALL_INT
(
(
machine_uint_t
)
unichar_charlen
((
const
char
*
)
start
,
end
-
start
)
+
1
);
}
}
// count the occurrences
// count the occurrences
...
@@ -1610,7 +1609,7 @@ STATIC mp_obj_t str_encode(uint n_args, const mp_obj_t *args) {
...
@@ -1610,7 +1609,7 @@ STATIC mp_obj_t str_encode(uint n_args, const mp_obj_t *args) {
}
}
#endif
#endif
STATIC
machine_int_t
str_get_buffer
(
mp_obj_t
self_in
,
mp_buffer_info_t
*
bufinfo
,
int
flags
)
{
machine_int_t
mp_obj_
str_get_buffer
(
mp_obj_t
self_in
,
mp_buffer_info_t
*
bufinfo
,
int
flags
)
{
if
(
flags
==
MP_BUFFER_READ
)
{
if
(
flags
==
MP_BUFFER_READ
)
{
GET_STR_DATA_LEN
(
self_in
,
str_data
,
str_len
);
GET_STR_DATA_LEN
(
self_in
,
str_data
,
str_len
);
bufinfo
->
buf
=
(
void
*
)
str_data
;
bufinfo
->
buf
=
(
void
*
)
str_data
;
...
@@ -1627,38 +1626,45 @@ STATIC machine_int_t str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo,
...
@@ -1627,38 +1626,45 @@ STATIC machine_int_t str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo,
}
}
#if MICROPY_CPYTHON_COMPAT
#if MICROPY_CPYTHON_COMPAT
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
bytes_decode_obj
,
1
,
3
,
bytes_decode
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
bytes_decode_obj
,
1
,
3
,
bytes_decode
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_encode_obj
,
1
,
3
,
str_encode
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_encode_obj
,
1
,
3
,
str_encode
);
#endif
#endif
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_find_obj
,
2
,
4
,
str_find
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_find_obj
,
2
,
4
,
str_find
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rfind_obj
,
2
,
4
,
str_rfind
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rfind_obj
,
2
,
4
,
str_rfind
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_index_obj
,
2
,
4
,
str_index
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_index_obj
,
2
,
4
,
str_index
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rindex_obj
,
2
,
4
,
str_rindex
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rindex_obj
,
2
,
4
,
str_rindex
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_2
(
str_join_obj
,
str_join
);
MP_DEFINE_CONST_FUN_OBJ_2
(
str_join_obj
,
str_join
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_split_obj
,
1
,
3
,
str_split
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_split_obj
,
1
,
3
,
str_split
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rsplit_obj
,
1
,
3
,
str_rsplit
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rsplit_obj
,
1
,
3
,
str_rsplit
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_startswith_obj
,
2
,
3
,
str_startswith
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_startswith_obj
,
2
,
3
,
str_startswith
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_endswith_obj
,
2
,
3
,
str_endswith
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_endswith_obj
,
2
,
3
,
str_endswith
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_strip_obj
,
1
,
2
,
str_strip
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_strip_obj
,
1
,
2
,
str_strip
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_lstrip_obj
,
1
,
2
,
str_lstrip
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_lstrip_obj
,
1
,
2
,
str_lstrip
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rstrip_obj
,
1
,
2
,
str_rstrip
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_rstrip_obj
,
1
,
2
,
str_rstrip
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR
(
str_format_obj
,
1
,
mp_obj_str_format
);
MP_DEFINE_CONST_FUN_OBJ_VAR
(
str_format_obj
,
1
,
mp_obj_str_format
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_replace_obj
,
3
,
4
,
str_replace
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_replace_obj
,
3
,
4
,
str_replace
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_count_obj
,
2
,
4
,
str_count
);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN
(
str_count_obj
,
2
,
4
,
str_count
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_2
(
str_partition_obj
,
str_partition
);
MP_DEFINE_CONST_FUN_OBJ_2
(
str_partition_obj
,
str_partition
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_2
(
str_rpartition_obj
,
str_rpartition
);
MP_DEFINE_CONST_FUN_OBJ_2
(
str_rpartition_obj
,
str_rpartition
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_1
(
str_lower_obj
,
str_lower
);
MP_DEFINE_CONST_FUN_OBJ_1
(
str_lower_obj
,
str_lower
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_1
(
str_upper_obj
,
str_upper
);
MP_DEFINE_CONST_FUN_OBJ_1
(
str_upper_obj
,
str_upper
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isspace_obj
,
str_isspace
);
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isspace_obj
,
str_isspace
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isalpha_obj
,
str_isalpha
);
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isalpha_obj
,
str_isalpha
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isdigit_obj
,
str_isdigit
);
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isdigit_obj
,
str_isdigit
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isupper_obj
,
str_isupper
);
MP_DEFINE_CONST_FUN_OBJ_1
(
str_isupper_obj
,
str_isupper
);
STATIC
MP_DEFINE_CONST_FUN_OBJ_1
(
str_islower_obj
,
str_islower
);
MP_DEFINE_CONST_FUN_OBJ_1
(
str_islower_obj
,
str_islower
);
STATIC
const
mp_map_elem_t
str_locals_dict_table
[]
=
{
STATIC
const
mp_map_elem_t
str_locals_dict_table
[]
=
{
#if MICROPY_CPYTHON_COMPAT
#if MICROPY_CPYTHON_COMPAT
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_decode
),
(
mp_obj_t
)
&
bytes_decode_obj
},
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_decode
),
(
mp_obj_t
)
&
bytes_decode_obj
},
#if !MICROPY_PY_BUILTINS_STR_UNICODE
// If we have separate unicode type, then here we have methods only
// for bytes type, and it should not have encode() methods. Otherwise,
// we have non-compliant-but-practical bytestring type, which shares
// method table with bytes, so they both have encode() and decode()
// methods (which should do type checking at runtime).
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_encode
),
(
mp_obj_t
)
&
str_encode_obj
},
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_encode
),
(
mp_obj_t
)
&
str_encode_obj
},
#endif
#endif
#endif
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_find
),
(
mp_obj_t
)
&
str_find_obj
},
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_find
),
(
mp_obj_t
)
&
str_find_obj
},
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_rfind
),
(
mp_obj_t
)
&
str_rfind_obj
},
{
MP_OBJ_NEW_QSTR
(
MP_QSTR_rfind
),
(
mp_obj_t
)
&
str_rfind_obj
},
...
@@ -1688,17 +1694,19 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = {
...
@@ -1688,17 +1694,19 @@ STATIC const mp_map_elem_t str_locals_dict_table[] = {
STATIC
MP_DEFINE_CONST_DICT
(
str_locals_dict
,
str_locals_dict_table
);
STATIC
MP_DEFINE_CONST_DICT
(
str_locals_dict
,
str_locals_dict_table
);
#if !MICROPY_PY_BUILTINS_STR_UNICODE
const
mp_obj_type_t
mp_type_str
=
{
const
mp_obj_type_t
mp_type_str
=
{
{
&
mp_type_type
},
{
&
mp_type_type
},
.
name
=
MP_QSTR_str
,
.
name
=
MP_QSTR_str
,
.
print
=
str_print
,
.
print
=
str_print
,
.
make_new
=
str_make_new
,
.
make_new
=
str_make_new
,
.
binary_op
=
str_binary_op
,
.
binary_op
=
mp_obj_
str_binary_op
,
.
subscr
=
str_subscr
,
.
subscr
=
str_subscr
,
.
getiter
=
mp_obj_new_str_iterator
,
.
getiter
=
mp_obj_new_str_iterator
,
.
buffer_p
=
{
.
get_buffer
=
str_get_buffer
},
.
buffer_p
=
{
.
get_buffer
=
mp_obj_
str_get_buffer
},
.
locals_dict
=
(
mp_obj_t
)
&
str_locals_dict
,
.
locals_dict
=
(
mp_obj_t
)
&
str_locals_dict
,
};
};
#endif
// Reuses most of methods from str
// Reuses most of methods from str
const
mp_obj_type_t
mp_type_bytes
=
{
const
mp_obj_type_t
mp_type_bytes
=
{
...
@@ -1706,10 +1714,10 @@ const mp_obj_type_t mp_type_bytes = {
...
@@ -1706,10 +1714,10 @@ const mp_obj_type_t mp_type_bytes = {
.
name
=
MP_QSTR_bytes
,
.
name
=
MP_QSTR_bytes
,
.
print
=
str_print
,
.
print
=
str_print
,
.
make_new
=
bytes_make_new
,
.
make_new
=
bytes_make_new
,
.
binary_op
=
str_binary_op
,
.
binary_op
=
mp_obj_
str_binary_op
,
.
subscr
=
str_subscr
,
.
subscr
=
str_subscr
,
.
getiter
=
mp_obj_new_bytes_iterator
,
.
getiter
=
mp_obj_new_bytes_iterator
,
.
buffer_p
=
{
.
get_buffer
=
str_get_buffer
},
.
buffer_p
=
{
.
get_buffer
=
mp_obj_
str_get_buffer
},
.
locals_dict
=
(
mp_obj_t
)
&
str_locals_dict
,
.
locals_dict
=
(
mp_obj_t
)
&
str_locals_dict
,
};
};
...
@@ -1866,6 +1874,7 @@ typedef struct _mp_obj_str_it_t {
...
@@ -1866,6 +1874,7 @@ typedef struct _mp_obj_str_it_t {
machine_uint_t
cur
;
machine_uint_t
cur
;
}
mp_obj_str_it_t
;
}
mp_obj_str_it_t
;
#if !MICROPY_PY_BUILTINS_STR_UNICODE
STATIC
mp_obj_t
str_it_iternext
(
mp_obj_t
self_in
)
{
STATIC
mp_obj_t
str_it_iternext
(
mp_obj_t
self_in
)
{
mp_obj_str_it_t
*
self
=
self_in
;
mp_obj_str_it_t
*
self
=
self_in
;
GET_STR_DATA_LEN
(
self
->
str
,
str
,
len
);
GET_STR_DATA_LEN
(
self
->
str
,
str
,
len
);
...
@@ -1885,6 +1894,15 @@ STATIC const mp_obj_type_t mp_type_str_it = {
...
@@ -1885,6 +1894,15 @@ STATIC const mp_obj_type_t mp_type_str_it = {
.
iternext
=
str_it_iternext
,
.
iternext
=
str_it_iternext
,
};
};
mp_obj_t
mp_obj_new_str_iterator
(
mp_obj_t
str
)
{
mp_obj_str_it_t
*
o
=
m_new_obj
(
mp_obj_str_it_t
);
o
->
base
.
type
=
&
mp_type_str_it
;
o
->
str
=
str
;
o
->
cur
=
0
;
return
o
;
}
#endif
STATIC
mp_obj_t
bytes_it_iternext
(
mp_obj_t
self_in
)
{
STATIC
mp_obj_t
bytes_it_iternext
(
mp_obj_t
self_in
)
{
mp_obj_str_it_t
*
self
=
self_in
;
mp_obj_str_it_t
*
self
=
self_in
;
GET_STR_DATA_LEN
(
self
->
str
,
str
,
len
);
GET_STR_DATA_LEN
(
self
->
str
,
str
,
len
);
...
@@ -1904,14 +1922,6 @@ STATIC const mp_obj_type_t mp_type_bytes_it = {
...
@@ -1904,14 +1922,6 @@ STATIC const mp_obj_type_t mp_type_bytes_it = {
.
iternext
=
bytes_it_iternext
,
.
iternext
=
bytes_it_iternext
,
};
};
mp_obj_t
mp_obj_new_str_iterator
(
mp_obj_t
str
)
{
mp_obj_str_it_t
*
o
=
m_new_obj
(
mp_obj_str_it_t
);
o
->
base
.
type
=
&
mp_type_str_it
;
o
->
str
=
str
;
o
->
cur
=
0
;
return
o
;
}
mp_obj_t
mp_obj_new_bytes_iterator
(
mp_obj_t
str
)
{
mp_obj_t
mp_obj_new_bytes_iterator
(
mp_obj_t
str
)
{
mp_obj_str_it_t
*
o
=
m_new_obj
(
mp_obj_str_it_t
);
mp_obj_str_it_t
*
o
=
m_new_obj
(
mp_obj_str_it_t
);
o
->
base
.
type
=
&
mp_type_bytes_it
;
o
->
base
.
type
=
&
mp_type_bytes_it
;
...
...
py/objstr.h
View file @
b1b84055
...
@@ -35,5 +35,53 @@ typedef struct _mp_obj_str_t {
...
@@ -35,5 +35,53 @@ typedef struct _mp_obj_str_t {
#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte*)str};
#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte*)str};
// use this macro to extract the string hash
#define GET_STR_HASH(str_obj_in, str_hash) \
uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) \
{ str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
// use this macro to extract the string length
#define GET_STR_LEN(str_obj_in, str_len) \
uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) \
{ str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
// use this macro to extract the string data and length
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) \
const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) \
{ str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } \
else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
mp_obj_t
mp_obj_str_format
(
uint
n_args
,
const
mp_obj_t
*
args
);
mp_obj_t
mp_obj_str_format
(
uint
n_args
,
const
mp_obj_t
*
args
);
mp_obj_t
mp_obj_new_str_of_type
(
const
mp_obj_type_t
*
type
,
const
byte
*
data
,
uint
len
);
mp_obj_t
mp_obj_new_str_of_type
(
const
mp_obj_type_t
*
type
,
const
byte
*
data
,
uint
len
);
mp_obj_t
mp_obj_str_binary_op
(
int
op
,
mp_obj_t
lhs_in
,
mp_obj_t
rhs_in
);
machine_int_t
mp_obj_str_get_buffer
(
mp_obj_t
self_in
,
mp_buffer_info_t
*
bufinfo
,
int
flags
);
const
byte
*
str_index_to_ptr
(
const
mp_obj_type_t
*
type
,
const
byte
*
self_data
,
uint
self_len
,
mp_obj_t
index
,
bool
is_slice
);
MP_DECLARE_CONST_FUN_OBJ
(
str_encode_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_find_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_rfind_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_index_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_rindex_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_join_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_split_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_rsplit_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_startswith_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_endswith_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_strip_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_lstrip_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_rstrip_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_format_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_replace_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_count_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_partition_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_rpartition_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_lower_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_upper_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_isspace_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_isalpha_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_isdigit_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_isupper_obj
);
MP_DECLARE_CONST_FUN_OBJ
(
str_islower_obj
);
py/objstrunicode.c
0 → 100644
View file @
b1b84055
/*
* This file is part of the Micro Python project, http://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2013, 2014 Damien P. George
* Copyright (c) 2014 Paul Sokolovsky