Commit 17f45d41 authored by Rachel Dowdall's avatar Rachel Dowdall
Browse files

Merge remote-tracking branch 'upstream/master'

parents 300c8bd4 da8d21e0
......@@ -375,28 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k
MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
STATIC mp_obj_t mp_builtin_str(mp_obj_t o_in) {
vstr_t *vstr = vstr_new();
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR);
mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
vstr_free(vstr);
return s;
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str);
// TODO: This should be type, this is just quick CPython compat hack
STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) {
if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) {
assert(0);
}
// Currently, MicroPython strings are mix between CPython byte and unicode
// strings. So, conversion is null so far.
return args[0];
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_bytes_obj, 1, 3, mp_builtin_bytes);
STATIC mp_obj_t mp_builtin_id(mp_obj_t o_in) {
return mp_obj_new_int((machine_int_t)o_in);
}
......
......@@ -46,9 +46,9 @@ MATH_FUN_1(fabs, fabs)
MATH_FUN_1(floor, floor) //TODO: delegate to x.__floor__() if x is not a float
MATH_FUN_2(fmod, fmod)
//MATH_FUN_1(frexp, frexp)
MATH_FUN_1(isfinite, isfinite)
MATH_FUN_1(isinf, isinf)
MATH_FUN_1(isnan, isnan)
//MATH_FUN_1(isfinite, isfinite)
//MATH_FUN_1(isinf, isinf)
//MATH_FUN_1(isnan, isnan)
MATH_FUN_1(trunc, trunc)
//TODO: factorial, fsum, frexp, ldexp, modf
......@@ -83,9 +83,9 @@ STATIC const mp_map_elem_t mp_module_math_globals_table[] = {
{ MP_OBJ_NEW_QSTR(MP_QSTR_floor), (mp_obj_t)&mp_math_floor_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_fmod), (mp_obj_t)&mp_math_fmod_obj },
//{ MP_OBJ_NEW_QSTR(MP_QSTR_frexp), (mp_obj_t)&mp_math_frexp_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_isfinite), (mp_obj_t)&mp_math_isfinite_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_isinf), (mp_obj_t)&mp_math_isinf_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_isnan), (mp_obj_t)&mp_math_isnan_obj },
//{ MP_OBJ_NEW_QSTR(MP_QSTR_isfinite), (mp_obj_t)&mp_math_isfinite_obj },
//{ MP_OBJ_NEW_QSTR(MP_QSTR_isinf), (mp_obj_t)&mp_math_isinf_obj },
//{ MP_OBJ_NEW_QSTR(MP_QSTR_isnan), (mp_obj_t)&mp_math_isnan_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_trunc), (mp_obj_t)&mp_math_trunc_obj },
};
......
......@@ -6,6 +6,7 @@
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
#include "parsenum.h"
#include "runtime0.h"
#include "map.h"
......@@ -36,15 +37,20 @@ STATIC mp_obj_t complex_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const
return mp_obj_new_complex(0, 0);
case 1:
// TODO allow string as first arg and parse it
if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
if (MP_OBJ_IS_STR(args[0])) {
// a string, parse it
uint l;
const char *s = mp_obj_str_get_data(args[0], &l);
return mp_parse_num_decimal(s, l, true, true);
} else if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
// a complex, just return it
return args[0];
} else {
// something else, try to cast it to a complex
return mp_obj_new_complex(mp_obj_get_float(args[0]), 0);
}
case 2:
{
case 2: {
mp_float_t real, imag;
if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
mp_obj_complex_get(args[0], &real, &imag);
......
......@@ -39,10 +39,12 @@ STATIC mp_obj_t float_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
// a string, parse it
uint l;
const char *s = mp_obj_str_get_data(args[0], &l);
return mp_parse_num_decimal(s, l);
return mp_parse_num_decimal(s, l, false, false);
} else if (MP_OBJ_IS_TYPE(args[0], &mp_type_float)) {
// a float, just return it
return args[0];
} else {
// something else, try to cast it to a float
return mp_obj_new_float(mp_obj_get_float(args[0]));
}
......
......@@ -14,9 +14,11 @@ typedef struct _mp_obj_str_t {
mp_obj_base_t base;
machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
byte data[];
const byte *data;
} mp_obj_str_t;
const mp_obj_t mp_const_empty_bytes;
// use this macro to extract the string hash
#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
......@@ -28,6 +30,7 @@ typedef struct _mp_obj_str_t {
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
/******************************************************************************/
/* str */
......@@ -78,6 +81,109 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env,
}
}
STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
switch (n_args) {
case 0:
return MP_OBJ_NEW_QSTR(MP_QSTR_);
case 1:
{
vstr_t *vstr = vstr_new();
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
vstr_free(vstr);
return s;
}
case 2:
case 3:
{
// TODO: validate 2nd/3rd args
if (!MP_OBJ_IS_TYPE(args[0], &bytes_type)) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
}
GET_STR_DATA_LEN(args[0], str_data, str_len);
GET_STR_HASH(args[0], str_hash);
mp_obj_str_t *o = str_new(&str_type, NULL, str_len);
o->data = str_data;
o->hash = str_hash;
return o;
}
default:
nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
}
}
STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
if (n_args == 0) {
return mp_const_empty_bytes;
}
if (MP_OBJ_IS_STR(args[0])) {
if (n_args < 2 || n_args > 3) {
goto wrong_args;
}
GET_STR_DATA_LEN(args[0], str_data, str_len);
GET_STR_HASH(args[0], str_hash);
mp_obj_str_t *o = str_new(&bytes_type, NULL, str_len);
o->data = str_data;
o->hash = str_hash;
return o;
}
if (n_args > 1) {
goto wrong_args;
}
if (MP_OBJ_IS_SMALL_INT(args[0])) {
uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
byte *data;
mp_obj_t o = mp_obj_str_builder_start(&bytes_type, len, &data);
memset(data, 0, len);
return mp_obj_str_builder_end(o);
}
int len;
byte *data;
vstr_t *vstr = NULL;
mp_obj_t o = NULL;
// Try to create array of exact len if initializer len is known
mp_obj_t len_in = mp_obj_len_maybe(args[0]);
if (len_in == MP_OBJ_NULL) {
len = -1;
vstr = vstr_new();
} else {
len = MP_OBJ_SMALL_INT_VALUE(len_in);
o = mp_obj_str_builder_start(&bytes_type, len, &data);
}
mp_obj_t iterable = rt_getiter(args[0]);
mp_obj_t item;
while ((item = rt_iternext(iterable)) != mp_const_stop_iteration) {
if (len == -1) {
vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
} else {
*data++ = MP_OBJ_SMALL_INT_VALUE(item);
}
}
if (len == -1) {
vstr_shrink(vstr);
// TODO: Optimize, borrow buffer from vstr
len = vstr_len(vstr);
o = mp_obj_str_builder_start(&bytes_type, len, &data);
memcpy(data, vstr_str(vstr), len);
vstr_free(vstr);
}
return mp_obj_str_builder_end(o);
wrong_args:
nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
}
// like strstr but with specified length and allows \0 bytes
// TODO replace with something more efficient/standard
STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
......@@ -520,6 +626,62 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
return MP_OBJ_NEW_SMALL_INT(num_occurrences);
}
STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t direction) {
assert(MP_OBJ_IS_STR(self_in));
if (!MP_OBJ_IS_STR(arg)) {
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
"Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg)));
}
GET_STR_DATA_LEN(self_in, str, str_len);
GET_STR_DATA_LEN(arg, sep, sep_len);
if (sep_len == 0) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
}
mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
if (direction > 0) {
result[0] = self_in;
} else {
result[2] = self_in;
}
if (str_len >= sep_len) {
machine_uint_t str_index, str_index_end;
if (direction > 0) {
str_index = 0;
str_index_end = str_len - sep_len;
} else {
str_index = str_len - sep_len;
str_index_end = 0;
}
for (;;) {
if (memcmp(&str[str_index], sep, sep_len) == 0) {
result[0] = mp_obj_new_str(str, str_index, false);
result[1] = arg;
result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false);
break;
}
if (str_index == str_index_end) {
break;
}
str_index += direction;
}
}
return mp_obj_new_tuple(3, result);
}
STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
return str_partitioner(self_in, arg, 1);
}
STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
return str_partitioner(self_in, arg, -1);
}
STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) {
if (flags == BUFFER_READ) {
GET_STR_DATA_LEN(self_in, str_data, str_len);
......@@ -542,6 +704,8 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace);
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
STATIC const mp_method_t str_type_methods[] = {
{ "find", &str_find_obj },
......@@ -552,6 +716,8 @@ STATIC const mp_method_t str_type_methods[] = {
{ "format", &str_format_obj },
{ "replace", &str_replace_obj },
{ "count", &str_count_obj },
{ "partition", &str_partition_obj },
{ "rpartition", &str_rpartition_obj },
{ NULL, NULL }, // end-of-list sentinel
};
......@@ -559,6 +725,7 @@ const mp_obj_type_t str_type = {
{ &mp_type_type },
.name = MP_QSTR_str,
.print = str_print,
.make_new = str_make_new,
.binary_op = str_binary_op,
.getiter = mp_obj_new_str_iterator,
.methods = str_type_methods,
......@@ -570,34 +737,45 @@ const mp_obj_type_t bytes_type = {
{ &mp_type_type },
.name = MP_QSTR_bytes,
.print = str_print,
.make_new = bytes_make_new,
.binary_op = str_binary_op,
.getiter = mp_obj_new_bytes_iterator,
.methods = str_type_methods,
};
// the zero-length bytes
STATIC const mp_obj_str_t empty_bytes_obj = {{&bytes_type}, 0, 0, NULL};
const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
o->base.type = type;
o->len = len;
*data = o->data;
byte *p = m_new(byte, len + 1);
o->data = p;
*data = p;
return o;
}
mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
assert(MP_OBJ_IS_STR(o_in));
mp_obj_str_t *o = o_in;
o->hash = qstr_compute_hash(o->data, o->len);
o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
byte *p = (byte*)o->data;
p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
return o;
}
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
o->base.type = type;
o->hash = qstr_compute_hash(data, len);
o->len = len;
memcpy(o->data, data, len * sizeof(byte));
o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
if (data) {
o->hash = qstr_compute_hash(data, len);
byte *p = m_new(byte, len + 1);
o->data = p;
memcpy(p, data, len * sizeof(byte));
p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
}
return o;
}
......
......@@ -9,139 +9,217 @@
#include "parsenumbase.h"
#include "parsenum.h"
#if defined(UNIX)
#include <ctype.h>
#include <errno.h>
#if MICROPY_ENABLE_FLOAT
#include <math.h>
#endif
mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
// TODO at the moment we ignore len; we should honour it!
// TODO detect integer overflow and return bignum
int c, neg = 0;
const char *p = str;
char *num;
long found;
const char *restrict top = str + len;
bool neg = false;
// check radix base
if ((base != 0 && base < 2) || base > 36) {
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "ValueError: int() arg 2 must be >=2 and <= 36"));
}
// skip surrounded whitespace
while (isspace((c = *(p++))));
if (c == 0) {
goto value_error;
// skip leading space
for (; str < top && unichar_isspace(*str); str++) {
}
// preced sign
if (c == '+' || c == '-') {
neg = - (c == '-');
} else {
p--;
// parse optional sign
if (str < top) {
if (*str == '+') {
str++;
} else if (*str == '-') {
str++;
neg = true;
}
}
len -= p - str;
int skip = mp_parse_num_base(p, len, &base);
p += skip;
len -= skip;
// parse optional base prefix
str += mp_parse_num_base(str, top - str, &base);
errno = 0;
found = strtol(p, &num, base);
if (errno) {
goto value_error;
} else if (found && *(num) == 0) {
goto done;
} else if (found || num != p) {
goto check_tail_space;
} else {
// string should be an integer number
machine_int_t int_val = 0;
const char *restrict str_val_start = str;
for (; str < top; str++) {
machine_int_t old_val = int_val;
int dig = *str;
if (unichar_isdigit(dig) && dig - '0' < base) {
// 0-9 digit
int_val = base * int_val + dig - '0';
} else if (base == 16) {
dig |= 0x20;
if ('a' <= dig && dig <= 'f') {
// a-f hex digit
int_val = base * int_val + dig - 'a' + 10;
} else {
// unknown character
break;
}
} else {
// unknown character
break;
}
if (int_val < old_val) {
// If new value became less than previous, it's overflow
goto overflow;
} else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
// If signed number changed sign - it's overflow
goto overflow;
}
}
// check we parsed something
if (str == str_val_start) {
goto value_error;
}
check_tail_space:
if (*(num) != 0) {
while (isspace((c = *(num++))));
if (c != 0) {
goto value_error;
}
// negate value if needed
if (neg) {
int_val = -int_val;
}
// skip trailing space
for (; str < top && unichar_isspace(*str); str++) {
}
// check we reached the end of the string
if (str != top) {
goto value_error;
}
done:
return MP_OBJ_NEW_SMALL_INT((found ^ neg) - neg);
// return the object
return MP_OBJ_NEW_SMALL_INT(int_val);
value_error:
nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "invalid literal for int() with base %d: '%s'", base, str));
}
#else /* defined(UNIX) */
mp_obj_t mp_parse_num_integer(const char *restrict str, uint len, int base) {
// TODO port strtol to stm
return MP_OBJ_NEW_SMALL_INT(0);
overflow:
// TODO reparse using bignum
nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "overflow parsing integer"));
}
#endif /* defined(UNIX) */
#define PARSE_DEC_IN_INTG (1)
#define PARSE_DEC_IN_FRAC (2)
#define PARSE_DEC_IN_EXP (3)
mp_obj_t mp_parse_num_decimal(const char *str, uint len) {
mp_obj_t mp_parse_num_decimal(const char *str, uint len, bool allow_imag, bool force_complex) {
#if MICROPY_ENABLE_FLOAT
int in = PARSE_DEC_IN_INTG;
const char *top = str + len;
mp_float_t dec_val = 0;
bool exp_neg = false;
int exp_val = 0;
int exp_extra = 0;
bool dec_neg = false;
bool imag = false;
const char *top = str + len;
for (; str < top; str++) {
int dig = *str;
if ('0' <= dig && dig <= '9') {
dig -= '0';
if (in == PARSE_DEC_IN_EXP) {
exp_val = 10 * exp_val + dig;
} else {
dec_val = 10 * dec_val + dig;
if (in == PARSE_DEC_IN_FRAC) {
exp_extra -= 1;
}
// skip leading space
for (; str < top && unichar_isspace(*str); str++) {
}
// parse optional sign
if (str < top) {
if (*str == '+') {
str++;
} else if (*str == '-') {
str++;
dec_neg = true;
}
}
// determine what the string is
if (str < top && (str[0] | 0x20) == 'i') {
// string starts with 'i', should be 'inf' or 'infinity' (case insensitive)
if (str + 2 < top && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'f') {
// inf
str += 3;
dec_val = INFINITY;
if (str + 4 < top && (str[0] | 0x20) == 'i' && (str[1] | 0x20) == 'n' && (str[2] | 0x20) == 'i' && (str[3] | 0x20) == 't' && (str[4] | 0x20) == 'y') {
// infinity
str += 5;
}
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
in = PARSE_DEC_IN_FRAC;
} else if (in != PARSE_DEC_IN_EXP && (dig == 'E' || dig == 'e')) {
in = PARSE_DEC_IN_EXP;
if (str[1] == '+') {
str++;
} else if (str[1] == '-') {
}
} else if (str < top && (str[0] | 0x20) == 'n') {
// string starts with 'n', should be 'nan' (case insensitive)
if (str + 2 < top && (str[1] | 0x20) == 'a' && (str[2] | 0x20) == 'n') {
// NaN
str += 3;
dec_val = MICROPY_FLOAT_C_FUN(nan)("");
}
} else {
// string should be a decimal number
int in = PARSE_DEC_IN_INTG;
bool exp_neg = false;
int exp_val = 0;
int exp_extra = 0;
for (; str < top; str++) {
int dig = *str;
if ('0' <= dig && dig <= '9') {
dig -= '0';
if (in == PARSE_DEC_IN_EXP) {
exp_val = 10 * exp_val + dig;
} else {
dec_val = 10 * dec_val + dig;
if (in == PARSE_DEC_IN_FRAC) {
exp_extra -= 1;
}
}
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
in = PARSE_DEC_IN_FRAC;
} else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
in = PARSE_DEC_IN_EXP;
if (str[1] == '+') {
str++;
} else if (str[1] == '-') {
str++;
exp_neg = true;
}
} else if (allow_imag && (dig | 0x20) == 'j') {
str++;