Commit 55baff4c authored by Damien George's avatar Damien George
Browse files

Revamp qstrs: they now include length and hash.

Can now have null bytes in strings.  Can define ROM qstrs per port using
qstrdefsport.h
parent 91d457a2
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "nlr.h" #include "nlr.h"
#include "misc.h" #include "misc.h"
#include "mpconfig.h" #include "mpconfig.h"
#include "qstr.h"
#include "obj.h" #include "obj.h"
#include "runtime0.h" #include "runtime0.h"
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "nlr.h" #include "nlr.h"
#include "misc.h" #include "misc.h"
#include "mpconfig.h" #include "mpconfig.h"
#include "mpqstr.h" #include "qstr.h"
#include "obj.h" #include "obj.h"
#include "runtime0.h" #include "runtime0.h"
#include "runtime.h" #include "runtime.h"
...@@ -36,9 +36,30 @@ void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj ...@@ -36,9 +36,30 @@ void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj
mp_obj_str_print_qstr(print, env, self->qstr, kind); mp_obj_str_print_qstr(print, env, self->qstr, kind);
} }
// like strstr but with specified length and allows \0 bytes
// TODO replace with something more efficient/standard
static const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
if (hlen >= nlen) {
for (uint i = 0; i <= hlen - nlen; i++) {
bool found = true;
for (uint j = 0; j < nlen; j++) {
if (haystack[i + j] != needle[j]) {
found = false;
break;
}
}
if (found) {
return haystack + i;
}
}
}
return NULL;
}
mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
mp_obj_str_t *lhs = lhs_in; mp_obj_str_t *lhs = lhs_in;
const char *lhs_str = qstr_str(lhs->qstr); uint lhs_len;
const byte *lhs_data = qstr_data(lhs->qstr, &lhs_len);
switch (op) { switch (op) {
case RT_BINARY_OP_SUBSCR: case RT_BINARY_OP_SUBSCR:
// TODO: need predicate to check for int-like type (bools are such for example) // TODO: need predicate to check for int-like type (bools are such for example)
...@@ -46,31 +67,30 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { ...@@ -46,31 +67,30 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
if (MP_OBJ_IS_SMALL_INT(rhs_in)) { if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
// TODO: This implements byte string access for single index so far // TODO: This implements byte string access for single index so far
// TODO: Handle negative indexes. // TODO: Handle negative indexes.
return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); return mp_obj_new_int(lhs_data[mp_obj_get_int(rhs_in)]);
#if MICROPY_ENABLE_SLICE #if MICROPY_ENABLE_SLICE
} else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
machine_int_t start, stop, step; machine_int_t start, stop, step;
mp_obj_slice_get(rhs_in, &start, &stop, &step); mp_obj_slice_get(rhs_in, &start, &stop, &step);
assert(step == 1); assert(step == 1);
int len = strlen(lhs_str);
if (start < 0) { if (start < 0) {
start = len + start; start = lhs_len + start;
if (start < 0) { if (start < 0) {
start = 0; start = 0;
} }
} else if (start > len) { } else if (start > lhs_len) {
start = len; start = lhs_len;
} }
if (stop <= 0) { if (stop <= 0) {
stop = len + stop; stop = lhs_len + stop;
// CPython returns empty string in such case // CPython returns empty string in such case
if (stop < 0) { if (stop < 0) {
stop = start; stop = start;
} }
} else if (stop > len) { } else if (stop > lhs_len) {
stop = len; stop = lhs_len;
} }
return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); return mp_obj_new_str(qstr_from_strn((const char*)lhs_data + start, stop - start));
#endif #endif
} else { } else {
// Message doesn't match CPython, but we don't have so much bytes as they // Message doesn't match CPython, but we don't have so much bytes as they
...@@ -82,24 +102,24 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { ...@@ -82,24 +102,24 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
case RT_BINARY_OP_INPLACE_ADD: case RT_BINARY_OP_INPLACE_ADD:
if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) { if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
// add 2 strings // add 2 strings
const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr); uint rhs_len;
size_t lhs_len = strlen(lhs_str); const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len);
size_t rhs_len = strlen(rhs_str); int alloc_len = lhs_len + rhs_len;
int alloc_len = lhs_len + rhs_len + 1; byte *q_ptr;
char *val = m_new(char, alloc_len); byte *val = qstr_build_start(alloc_len, &q_ptr);
memcpy(val, lhs_str, lhs_len); memcpy(val, lhs_data, lhs_len);
memcpy(val + lhs_len, rhs_str, rhs_len); memcpy(val + lhs_len, rhs_data, rhs_len);
val[lhs_len + rhs_len] = '\0'; return mp_obj_new_str(qstr_build_end(q_ptr));
return mp_obj_new_str(qstr_from_str_take(val, alloc_len));
} }
break; break;
case RT_COMPARE_OP_IN: case RT_COMPARE_OP_IN:
case RT_COMPARE_OP_NOT_IN: case RT_COMPARE_OP_NOT_IN:
/* NOTE `a in b` is `b.__contains__(a)` */ /* NOTE `a in b` is `b.__contains__(a)` */
if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) { if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
const char *rhs_str = qstr_str(((mp_obj_str_t*)rhs_in)->qstr); uint rhs_len;
/* FIXME \0 in strs */ const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len);
return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (strstr(lhs_str, rhs_str) == NULL)); return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len) == NULL));
return mp_const_false;
} }
break; break;
} }
...@@ -143,22 +163,22 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { ...@@ -143,22 +163,22 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
} }
// make joined string // make joined string
char *joined_str = m_new(char, required_len + 1); byte *q_ptr;
char *s_dest = joined_str; byte *s_dest = qstr_build_start(required_len, &q_ptr);
for (int i = 0; i < seq_len; i++) { for (int i = 0; i < seq_len; i++) {
if (i > 0) { if (i > 0) {
memcpy(s_dest, sep_str, sep_len); memcpy(s_dest, sep_str, sep_len);
s_dest += sep_len; s_dest += sep_len;
} }
const char *s2 = qstr_str(mp_obj_str_get(seq_items[i])); uint s2_len;
size_t s2_len = strlen(s2); const byte *s2 = qstr_data(mp_obj_str_get(seq_items[i]), &s2_len);
memcpy(s_dest, s2, s2_len); memcpy(s_dest, s2, s2_len);
s_dest += s2_len; s_dest += s2_len;
} }
*s_dest = '\0'; qstr q = qstr_build_end(q_ptr);
// return joined string // return joined string
return mp_obj_new_str(qstr_from_str_take(joined_str, required_len + 1)); return mp_obj_new_str(q);
bad_arg: bad_arg:
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's")); nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's"));
...@@ -246,20 +266,14 @@ mp_obj_t str_strip(uint n_args, const mp_obj_t *args) { ...@@ -246,20 +266,14 @@ mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
} }
if (first_good_char_pos == 0 && last_good_char_pos == 0) { if (first_good_char_pos == 0 && last_good_char_pos == 0) {
//string is all whitespace, return '\0' //string is all whitespace, return ''
char *empty = m_new(char, 1); return mp_obj_new_str(MP_QSTR_);
empty[0] = '\0';
return mp_obj_new_str(qstr_from_str_take(empty, 1));
} }
assert(last_good_char_pos >= first_good_char_pos); assert(last_good_char_pos >= first_good_char_pos);
//+1 to accomodate the last character //+1 to accomodate the last character
size_t stripped_len = last_good_char_pos - first_good_char_pos + 1; size_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
//+1 to accomodate '\0' return mp_obj_new_str(qstr_from_strn(orig_str + first_good_char_pos, stripped_len));
char *stripped_str = m_new(char, stripped_len + 1);
memcpy(stripped_str, orig_str + first_good_char_pos, stripped_len);
stripped_str[stripped_len] = '\0';
return mp_obj_new_str(qstr_from_str_take(stripped_str, stripped_len + 1));
} }
mp_obj_t str_format(uint n_args, const mp_obj_t *args) { mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
...@@ -288,7 +302,7 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) { ...@@ -288,7 +302,7 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
} }
} }
return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc)); return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
} }
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find); static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
...@@ -339,7 +353,7 @@ mp_obj_t str_it_iternext(mp_obj_t self_in) { ...@@ -339,7 +353,7 @@ mp_obj_t str_it_iternext(mp_obj_t self_in) {
mp_obj_str_it_t *self = self_in; mp_obj_str_it_t *self = self_in;
const char *str = qstr_str(self->str->qstr); const char *str = qstr_str(self->str->qstr);
if (self->cur < strlen(str)) { if (self->cur < strlen(str)) {
mp_obj_t o_out = mp_obj_new_str(qstr_from_strn_copy(str + self->cur, 1)); mp_obj_t o_out = mp_obj_new_str(qstr_from_strn(str + self->cur, 1));
self->cur += 1; self->cur += 1;
return o_out; return o_out;
} else { } else {
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "nlr.h" #include "nlr.h"
#include "misc.h" #include "misc.h"
#include "mpconfig.h" #include "mpconfig.h"
#include "mpqstr.h" #include "qstr.h"
#include "obj.h" #include "obj.h"
#include "runtime0.h" #include "runtime0.h"
#include "runtime.h" #include "runtime.h"
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "nlr.h" #include "nlr.h"
#include "misc.h" #include "misc.h"
#include "mpconfig.h" #include "mpconfig.h"
#include "mpqstr.h" #include "qstr.h"
#include "obj.h" #include "obj.h"
#include "map.h" #include "map.h"
#include "runtime0.h" #include "runtime0.h"
...@@ -166,7 +166,7 @@ static mp_obj_t class_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { ...@@ -166,7 +166,7 @@ static mp_obj_t class_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
if (op_name == NULL) { if (op_name == NULL) {
return MP_OBJ_NULL; return MP_OBJ_NULL;
} }
mp_obj_t member = mp_obj_class_lookup(lhs->base.type, qstr_from_str_static(op_name)); mp_obj_t member = mp_obj_class_lookup(lhs->base.type, QSTR_FROM_STR_STATIC(op_name));
if (member != MP_OBJ_NULL) { if (member != MP_OBJ_NULL) {
return rt_call_function_2(member, lhs_in, rhs_in); return rt_call_function_2(member, lhs_in, rhs_in);
} else { } else {
...@@ -219,7 +219,7 @@ static bool class_store_attr(mp_obj_t self_in, qstr attr, mp_obj_t value) { ...@@ -219,7 +219,7 @@ static bool class_store_attr(mp_obj_t self_in, qstr attr, mp_obj_t value) {
bool class_store_item(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { bool class_store_item(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
mp_obj_class_t *self = self_in; mp_obj_class_t *self = self_in;
mp_obj_t member = mp_obj_class_lookup(self->base.type, qstr_from_str_static("__setitem__")); mp_obj_t member = mp_obj_class_lookup(self->base.type, QSTR_FROM_STR_STATIC("__setitem__"));
if (member != MP_OBJ_NULL) { if (member != MP_OBJ_NULL) {
mp_obj_t args[3] = {self_in, index, value}; mp_obj_t args[3] = {self_in, index, value};
rt_call_function_n_kw(member, 3, 0, args); rt_call_function_n_kw(member, 3, 0, args);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "misc.h" #include "misc.h"
#include "mpconfig.h" #include "mpconfig.h"
#include "qstr.h"
#include "obj.h" #include "obj.h"
#include "runtime.h" #include "runtime.h"
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include "misc.h" #include "misc.h"
#include "mpconfig.h" #include "mpconfig.h"
#include "mpqstr.h" #include "qstr.h"
#include "lexer.h" #include "lexer.h"
#include "parse.h" #include "parse.h"
...@@ -205,7 +205,7 @@ static void push_result_token(parser_t *parser, const mp_lexer_t *lex) { ...@@ -205,7 +205,7 @@ static void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
const mp_token_t *tok = mp_lexer_cur(lex); const mp_token_t *tok = mp_lexer_cur(lex);
mp_parse_node_t pn; mp_parse_node_t pn;
if (tok->kind == MP_TOKEN_NAME) { if (tok->kind == MP_TOKEN_NAME) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len)); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
} else if (tok->kind == MP_TOKEN_NUMBER) { } else if (tok->kind == MP_TOKEN_NUMBER) {
bool dec = false; bool dec = false;
bool small_int = true; bool small_int = true;
...@@ -254,16 +254,16 @@ static void push_result_token(parser_t *parser, const mp_lexer_t *lex) { ...@@ -254,16 +254,16 @@ static void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
} }
} }
if (dec) { if (dec) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len)); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
} else if (small_int && !overflow && MP_FIT_SMALL_INT(int_val)) { } else if (small_int && !overflow && MP_FIT_SMALL_INT(int_val)) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
} else { } else {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len)); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
} }
} else if (tok->kind == MP_TOKEN_STRING) { } else if (tok->kind == MP_TOKEN_STRING) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len)); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn(tok->str, tok->len));
} else if (tok->kind == MP_TOKEN_BYTES) { } else if (tok->kind == MP_TOKEN_BYTES) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len)); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
} else { } else {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
} }
......
...@@ -25,13 +25,8 @@ endif ...@@ -25,13 +25,8 @@ endif
# default settings; can be overriden in main Makefile # default settings; can be overriden in main Makefile
ifndef PY_SRC PY_SRC ?= ../py
PY_SRC = ../py BUILD ?= build
endif
ifndef BUILD
BUILD = build
endif
# to create the build directory # to create the build directory
...@@ -42,6 +37,10 @@ $(BUILD): ...@@ -42,6 +37,10 @@ $(BUILD):
PY_BUILD = $(BUILD)/py. PY_BUILD = $(BUILD)/py.
# file containing qstr defs for the core Python bit
PY_QSTR_DEFS = $(PY_SRC)/qstrdefs.h
# py object files # py object files
PY_O_BASENAME = \ PY_O_BASENAME = \
...@@ -97,6 +96,7 @@ PY_O_BASENAME = \ ...@@ -97,6 +96,7 @@ PY_O_BASENAME = \
objstr.o \ objstr.o \
objtuple.o \ objtuple.o \
objtype.o \ objtype.o \
objzip.o \
stream.o \ stream.o \
builtin.o \ builtin.o \
builtinimport.o \ builtinimport.o \
...@@ -105,12 +105,21 @@ PY_O_BASENAME = \ ...@@ -105,12 +105,21 @@ PY_O_BASENAME = \
vm.o \ vm.o \
showbc.o \ showbc.o \
repl.o \ repl.o \
objzip.o \
# prepend the build destination prefix to the py object files # prepend the build destination prefix to the py object files
PY_O = $(addprefix $(PY_BUILD), $(PY_O_BASENAME)) PY_O = $(addprefix $(PY_BUILD), $(PY_O_BASENAME))
# qstr data
$(PY_BUILD)qstr.o: $(PY_BUILD)qstrdefs.generated.h
$(PY_BUILD)qstrdefs.generated.h: $(PY_QSTR_DEFS) $(QSTR_DEFS) $(PY_SRC)/makeqstrdata.py
$(ECHO) "makeqstrdata $(PY_QSTR_DEFS) $(QSTR_DEFS)"
$(Q)python $(PY_SRC)/makeqstrdata.py $(PY_QSTR_DEFS) $(QSTR_DEFS) > $@
# emitters
$(PY_BUILD)emitnx64.o: $(PY_SRC)/emitnative.c $(PY_SRC)/emit.h mpconfigport.h $(PY_BUILD)emitnx64.o: $(PY_SRC)/emitnative.c $(PY_SRC)/emit.h mpconfigport.h
$(ECHO) "CC $<" $(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -DN_X64 -c -o $@ $< $(Q)$(CC) $(CFLAGS) -DN_X64 -c -o $@ $<
...@@ -119,11 +128,13 @@ $(PY_BUILD)emitnthumb.o: $(PY_SRC)/emitnative.c $(PY_SRC)/emit.h mpconfigport.h ...@@ -119,11 +128,13 @@ $(PY_BUILD)emitnthumb.o: $(PY_SRC)/emitnative.c $(PY_SRC)/emit.h mpconfigport.h
$(ECHO) "CC $<" $(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -DN_THUMB -c -o $@ $< $(Q)$(CC) $(CFLAGS) -DN_THUMB -c -o $@ $<
# general source files
$(PY_BUILD)%.o: $(PY_SRC)/%.S $(PY_BUILD)%.o: $(PY_SRC)/%.S
$(ECHO) "CC $<" $(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -c -o $@ $< $(Q)$(CC) $(CFLAGS) -c -o $@ $<
$(PY_BUILD)%.o: $(PY_SRC)/%.c mpconfigport.h $(PY_BUILD)%.o: $(PY_SRC)/%.c mpconfigport.h $(PY_SRC)/qstr.h $(PY_QSTR_DEFS) $(QSTR_DEFS)
$(ECHO) "CC $<" $(ECHO) "CC $<"
$(Q)$(CC) $(CFLAGS) -c -o $@ $< $(Q)$(CC) $(CFLAGS) -c -o $@ $<
...@@ -141,5 +152,5 @@ $(PY_BUILD)vm.o: $(PY_SRC)/vm.c ...@@ -141,5 +152,5 @@ $(PY_BUILD)vm.o: $(PY_SRC)/vm.c
$(PY_BUILD)parse.o: $(PY_SRC)/grammar.h $(PY_BUILD)parse.o: $(PY_SRC)/grammar.h
$(PY_BUILD)compile.o: $(PY_SRC)/grammar.h $(PY_BUILD)compile.o: $(PY_SRC)/grammar.h
$(PY_BUILD)/emitcpy.o: $(PY_SRC)/emit.h $(PY_BUILD)emitcpy.o: $(PY_SRC)/emit.h
$(PY_BUILD)emitbc.o: $(PY_SRC)/emit.h $(PY_BUILD)emitbc.o: $(PY_SRC)/emit.h
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
#include <string.h> #include <string.h>
#include "misc.h" #include "misc.h"
#include "mpqstr.h" #include "mpconfig.h"
#include "qstr.h"
// NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings) // NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings)
// ultimately we will replace this with a static hash table of some kind // ultimately we will replace this with a static hash table of some kind
...@@ -15,12 +16,33 @@ ...@@ -15,12 +16,33 @@
#define DEBUG_printf(args...) (void)0 #define DEBUG_printf(args...) (void)0
#endif #endif
// A qstr is an index into the qstr pool.
// The data for a qstr contains (hash, length, data).
// For now we use very simple encoding, just to get the framework correct:
// - hash is 2 bytes (simply the sum of data bytes)
// - length is 2 bytes
// - data follows
// - \0 terminated (for now, so they can be printed using printf)
#define Q_GET_HASH(q) ((q)[0] | ((q)[1] << 8))
#define Q_GET_ALLOC(q) (4 + Q_GET_LENGTH(q) + 1)
#define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
#define Q_GET_DATA(q) ((q) + 4)
static machine_uint_t compute_hash(const byte *data, uint len) {
machine_uint_t hash = 0;
for (const byte *top = data + len; data < top; data++) {
hash += *data;
}
return hash & 0xffff;
}
typedef struct _qstr_pool_t { typedef struct _qstr_pool_t {
struct _qstr_pool_t *prev; struct _qstr_pool_t *prev;
uint total_prev_len; uint total_prev_len;
uint alloc; uint alloc;
uint len; uint len;
const char *qstrs[]; const byte *qstrs[];
} qstr_pool_t; } qstr_pool_t;
const static qstr_pool_t const_pool = { const static qstr_pool_t const_pool = {
...@@ -29,9 +51,11 @@ const static qstr_pool_t const_pool = { ...@@ -29,9 +51,11 @@ const static qstr_pool_t const_pool = {
10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below) 10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below)
MP_QSTR_number_of, // corresponds to number of strings in array just below MP_QSTR_number_of, // corresponds to number of strings in array just below
{ {
"nil", // must be first, since 0 qstr is nil (const byte*) "\0\0\0\0", // invalid/no qstr has empty data
#define Q(id) #id, (const byte*) "\0\0\0\0", // empty qstr
#include "mpqstrraw.h" #define Q(id, str) str,
// TODO having 'build/' here is a bit of a hack, should take config variable from Makefile
#include "build/py.qstrdefs.generated.h"
#undef Q #undef Q
}, },
}; };
...@@ -42,8 +66,20 @@ void qstr_init(void) { ...@@ -42,8 +66,20 @@ void qstr_init(void) {
last_pool = (qstr_pool_t*)&const_pool; // we won't modify the const_pool since it has no allocated room left last_pool = (qstr_pool_t*)&const_pool; // we won't modify the const_pool since it has no allocated room left
} }
static qstr qstr_add(const char *str) { static const byte *find_qstr(qstr q) {
DEBUG_printf("QSTR: add %s\n", str); // search pool for this qstr
for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) {
if (q >= pool->total_prev_len) {
return pool->qstrs[q - pool->total_prev_len];
}
}
// not found
return 0;
}
static qstr qstr_add(const byte *q_ptr) {
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr));
// make sure we have room in the pool for a new qstr // make sure we have room in the pool for a new qstr
if (last_pool->len >= last_pool->alloc) { if (last_pool->len >= last_pool->alloc) {
...@@ -57,55 +93,95 @@ static qstr qstr_add(const char *str) { ...@@ -57,55 +93,95 @@ static qstr qstr_add(const char *str) {
} }
// add the new qstr // add the new qstr
last_pool->qstrs[last_pool->len++] = str; last_pool->qstrs[last_pool->len++] = q_ptr;
// return id for the newly-added qstr // return id for the newly-added qstr
return last_pool->total_prev_len + last_pool->len - 1; return last_pool->total_prev_len + last_pool->len - 1;
} }
qstr qstr_from_str_static(const char *str) { static qstr qstr_find_strn(const byte *str, uint str_len) {
// work out hash of str
machine_uint_t str_hash = compute_hash((const byte*)str, str_len);