Commit 55baff4c authored by Damien George's avatar Damien George
Browse files

Revamp qstrs: they now include length and hash.

Can now have null bytes in strings.  Can define ROM qstrs per port using
qstrdefsport.h
parent 91d457a2
......@@ -8,7 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
#include "mpqstr.h"
#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
......@@ -139,8 +139,8 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_callable_obj, mp_builtin_callable);
static mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
int ord = mp_obj_get_int(o_in);
if (0 <= ord && ord <= 0x10ffff) {
char str[2] = {ord, '\0'};
return mp_obj_new_str(qstr_from_strn_copy(str, 1));
char str[1] = {ord};
return mp_obj_new_str(qstr_from_strn(str, 1));
} else {
nlr_jump(mp_obj_new_exception_msg(MP_QSTR_ValueError, "chr() arg not in range(0x110000)"));
}
......@@ -257,11 +257,12 @@ static mp_obj_t mp_builtin_next(mp_obj_t o) {
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_next_obj, mp_builtin_next);
static mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
const char *str = qstr_str(mp_obj_get_qstr(o_in));
if (strlen(str) == 1) {
uint len;
const byte *str = qstr_data(mp_obj_get_qstr(o_in), &len);
if (len == 1) {
return mp_obj_new_int(str[0]);
} else {
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "ord() expected a character, but string of length %d found", strlen(str)));
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "ord() expected a character, but string of length %d found", len));
}
}
......@@ -304,7 +305,8 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_range_obj, 1, 3, mp_builtin_range
static mp_obj_t mp_builtin_repr(mp_obj_t o_in) {
vstr_t *vstr = vstr_new();
mp_obj_print_helper((void (*)(void *env, const char *fmt, ...))vstr_printf, vstr, o_in, PRINT_REPR);
return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
// TODO don't intern this string
return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_repr_obj, mp_builtin_repr);
......@@ -343,7 +345,8 @@ MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
static mp_obj_t mp_builtin_str(mp_obj_t o_in) {
vstr_t *vstr = vstr_new();
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR);
return mp_obj_new_str(qstr_from_str_take(vstr->buf, vstr->alloc));
// TODO don't intern this string
return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str);
......@@ -8,6 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "lexerunix.h"
#include "parse.h"
......@@ -19,10 +20,11 @@
#include "builtin.h"
static mp_obj_t mp_builtin_eval(mp_obj_t o_in) {
const char *str = qstr_str(mp_obj_get_qstr(o_in));
uint str_len;
const byte *str = qstr_data(mp_obj_get_qstr(o_in), &str_len);
// create the lexer
mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", str, strlen(str), 0);
mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", (const char*)str, str_len, 0);
// parse the string
qstr parse_exc_id;
......
......@@ -8,6 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "lexerunix.h"
#include "parse.h"
......
......@@ -7,7 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "mpqstr.h"
#include "qstr.h"
#include "obj.h"
#include "runtime.h"
#include "builtin.h"
......@@ -38,8 +38,8 @@ void mp_module_micropython_init(void) {
rt_store_name(MP_QSTR_micropython, m_mp);
#if MICROPY_MEM_STATS
rt_store_attr(m_mp, qstr_from_str_static("mem_total"), (mp_obj_t)&mp_builtin_mem_total_obj);
rt_store_attr(m_mp, qstr_from_str_static("mem_current"), (mp_obj_t)&mp_builtin_mem_current_obj);
rt_store_attr(m_mp, qstr_from_str_static("mem_peak"), (mp_obj_t)&mp_builtin_mem_peak_obj);
rt_store_attr(m_mp, QSTR_FROM_STR_STATIC("mem_total"), (mp_obj_t)&mp_builtin_mem_total_obj);
rt_store_attr(m_mp, QSTR_FROM_STR_STATIC("mem_current"), (mp_obj_t)&mp_builtin_mem_current_obj);
rt_store_attr(m_mp, QSTR_FROM_STR_STATIC("mem_peak"), (mp_obj_t)&mp_builtin_mem_peak_obj);
#endif
}
......@@ -7,7 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "mpqstr.h"
#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
......@@ -273,8 +273,8 @@ static bool cpython_c_tuple_is_const(mp_parse_node_t pn) {
}
static void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) {
const char *str = qstr_str(qstr);
int len = strlen(str);
uint len;
const byte *str = qstr_data(qstr, &len);
bool has_single_quote = false;
bool has_double_quote = false;
for (int i = 0; i < len; i++) {
......@@ -1169,22 +1169,20 @@ void do_import_name(compiler_t *comp, mp_parse_node_t pn, qstr *q1, qstr *q2) {
int n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
int len = n - 1;
for (int i = 0; i < n; i++) {
len += strlen(qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i])));
len += qstr_len(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
}
char *str = m_new(char, len + 1);
char *str_dest = str;
str[0] = 0;
byte *q_ptr;
byte *str_dest = qstr_build_start(len, &q_ptr);
for (int i = 0; i < n; i++) {
if (i > 0) {
*str_dest++ = '.';
}
const char *str_src = qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
size_t str_src_len = strlen(str_src);
uint str_src_len;
const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len);
memcpy(str_dest, str_src, str_src_len);
str_dest += str_src_len;
}
*str_dest = '\0';
*q2 = qstr_from_str_take(str, len + 1);
*q2 = qstr_build_end(q_ptr);
EMIT(import_name, *q2);
if (is_as) {
for (int i = 1; i < n; i++) {
......@@ -1221,7 +1219,7 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
#if MICROPY_EMIT_CPYTHON
EMIT(load_const_verbatim_str, "('*',)");
#else
EMIT(load_const_str, qstr_from_str_static("*"), false);
EMIT(load_const_str, QSTR_FROM_STR_STATIC("*"), false);
EMIT(build_tuple, 1);
#endif
......@@ -1248,7 +1246,9 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
vstr_printf(vstr, ", ");
}
vstr_printf(vstr, "'");
vstr_printf(vstr, qstr_str(id2));
uint len;
const byte *str = qstr_data(id2, &len);
vstr_add_strn(vstr, (const char*)str, len);
vstr_printf(vstr, "'");
}
if (n == 1) {
......@@ -2128,24 +2128,21 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
printf("SyntaxError: cannot mix bytes and nonbytes literals\n");
return;
}
const char *str = qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
n_bytes += strlen(str);
n_bytes += qstr_len(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
}
// allocate memory for concatenated string/bytes
char *cat_str = m_new(char, n_bytes + 1);
// concatenate string/bytes
char *s_dest = cat_str;
byte *q_ptr;
byte *s_dest = qstr_build_start(n_bytes, &q_ptr);
for (int i = 0; i < n; i++) {
const char *s = qstr_str(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
size_t s_len = strlen(s);
uint s_len;
const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len);
memcpy(s_dest, s, s_len);
s_dest += s_len;
}
*s_dest = '\0';
qstr q = qstr_build_end(q_ptr);
EMIT(load_const_str, qstr_from_str_take(cat_str, n_bytes + 1), string_kind == MP_PARSE_NODE_BYTES);
EMIT(load_const_str, q, string_kind == MP_PARSE_NODE_BYTES);
}
// pns needs to have 2 nodes, first is lhs of comprehension, second is PN_comp_for node
......@@ -2767,7 +2764,7 @@ void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
assert(MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[1], PN_comp_for));
mp_parse_node_struct_t *pns_comp_for = (mp_parse_node_struct_t*)pns->nodes[1];
qstr qstr_arg = qstr_from_str_static(".0");
qstr qstr_arg = QSTR_FROM_STR_STATIC(".0");
if (comp->pass == PASS_1) {
bool added;
id_info_t *id_info = scope_find_or_add_id(comp->scope_cur, qstr_arg, &added);
......
......@@ -7,6 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
......
......@@ -6,6 +6,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
......
......@@ -7,6 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
......
......@@ -7,6 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
......
......@@ -25,6 +25,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
......
......@@ -7,7 +7,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "mpqstr.h"
#include "qstr.h"
#include "lexer.h"
#include "parse.h"
#include "scope.h"
......
......@@ -7,6 +7,8 @@
#include <assert.h>
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#define TAB_SIZE (8)
......@@ -593,7 +595,7 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) {
mp_lexer_t *lex = m_new(mp_lexer_t, 1);
lex->source_name = qstr_from_strn_copy(src_name, strlen(src_name));
lex->source_name = qstr_from_str(src_name);
lex->stream_data = stream_data;
lex->stream_next_char = stream_next_char;
lex->stream_close = stream_close;
......
......@@ -2,6 +2,8 @@
#include <stdio.h>
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
typedef struct _mp_lexer_str_buf_t {
......
......@@ -5,6 +5,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "lexer.h"
#include "lexerunix.h"
......
import argparse
import re
# this must match the equivalent function in qstr.c
def compute_hash(qstr):
hash = 0
for char in qstr:
hash += ord(char)
return hash & 0xffff
def do_work(infiles):
# read the qstrs in from the input files
qstrs = []
for infile in infiles:
with open(infile, 'rt') as f:
line_number = 0
for line in f:
line_number += 1
line = line.strip()
# ignore blank lines and comments
if len(line) == 0 or line.startswith('//'):
continue
# verify line is of the correct form
match = re.match(r'Q\(([0-9A-Za-z_]+)\)$', line)
if not match:
print('({}:{}) bad qstr format, got {}'.format(infile, line_number, line))
return False
# get the qstr value
qstr = match.group(1)
# don't add duplicates
if qstr in qstrs:
continue
# add the qstr to the list
qstrs.append(qstr)
# process the qstrs, printing out the generated C header file
print('// This file was automatically generated by makeqstrdata.py')
print()
for qstr in qstrs:
qhash = compute_hash(qstr)
qlen = len(qstr)
print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(qstr, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr))
return True
def main():
arg_parser = argparse.ArgumentParser(description='Process raw qstr file and output qstr data with length, hash and data bytes')
arg_parser.add_argument('files', nargs='+', help='input file(s)')
args = arg_parser.parse_args()
result = do_work(args.files)
if not result:
print('exiting with error code')
exit(1)
if __name__ == "__main__":
main()
......@@ -4,6 +4,7 @@
#include "misc.h"
#include "mpconfig.h"
#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "map.h"
......
......@@ -88,14 +88,4 @@ void vstr_printf(vstr_t *vstr, const char *fmt, ...);
void vstr_vprintf(vstr_t *vstr, const char *fmt, va_list ap);
#endif
/** unique string ***********************************************/
typedef unsigned int qstr;
void qstr_init(void);
qstr qstr_from_str_static(const char *str);
qstr qstr_from_str_take(char *str, int alloc_len);
qstr qstr_from_strn_copy(const char *str, int len);
const char* qstr_str(qstr qstr);
#endif // _INCLUDED_MINILIB_H
// See mpqstrraw.h for a list of qstr's that are available as constants.
// Reference them as MP_QSTR_xxxx.
//
// Note: it would be possible to define MP_QSTR_xxx as qstr_from_str_static("xxx")
// for qstrs that are referenced this way, but you don't want to have them in ROM.
enum {
MP_QSTR_nil = 0,
#define Q(id) MP_QSTR_##id,
#include "mpqstrraw.h"
#undef Q
MP_QSTR_number_of,
} category_t;
......@@ -8,7 +8,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
#include "mpqstr.h"
#include "qstr.h"
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
......@@ -268,7 +268,7 @@ uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index)
mp_obj_t mp_obj_len_maybe(mp_obj_t o_in) {
mp_small_int_t len = 0;
if (MP_OBJ_IS_TYPE(o_in, &str_type)) {
len = strlen(qstr_str(mp_obj_str_get(o_in)));
len = qstr_len(mp_obj_str_get(o_in));
} else if (MP_OBJ_IS_TYPE(o_in, &tuple_type)) {
uint seq_len;
mp_obj_t *seq_items;
......
......@@ -7,7 +7,7 @@
#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
#include "mpqstr.h"
#include "qstr.h"
#include "obj.h"
#include "map.h"
#include "runtime0.h"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment