Commit c3bd9415 authored by Damien George's avatar Damien George
Browse files

py: Make qstr hash size configurable, defaults to 2 bytes.

This patch makes configurable, via MICROPY_QSTR_BYTES_IN_HASH, the
number of bytes used for a qstr hash.  It was originally fixed at 2
bytes, and now defaults to 2 bytes.  Setting it to 1 byte will save
ROM and RAM at a small expense of hash collisions.
parent 1e8ca3a3
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
// options to control how Micro Python is built // options to control how Micro Python is built
#define MICROPY_QSTR_BYTES_IN_HASH (1)
#define MICROPY_ALLOC_PATH_MAX (512) #define MICROPY_ALLOC_PATH_MAX (512)
#define MICROPY_EMIT_X64 (0) #define MICROPY_EMIT_X64 (0)
#define MICROPY_EMIT_THUMB (0) #define MICROPY_EMIT_THUMB (0)
......
...@@ -37,12 +37,12 @@ codepoint2name[ord('!')] = 'bang' ...@@ -37,12 +37,12 @@ codepoint2name[ord('!')] = 'bang'
codepoint2name[ord('\\')] = 'backslash' codepoint2name[ord('\\')] = 'backslash'
# this must match the equivalent function in qstr.c # this must match the equivalent function in qstr.c
def compute_hash(qstr): def compute_hash(qstr, bytes_hash):
hash = 5381 hash = 5381
for char in qstr: for char in qstr:
hash = (hash * 33) ^ ord(char) hash = (hash * 33) ^ ord(char)
# Make sure that valid hash is never zero, zero means "hash not computed" # Make sure that valid hash is never zero, zero means "hash not computed"
return (hash & 0xffff) or 1 return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
def do_work(infiles): def do_work(infiles):
# read the qstrs in from the input files # read the qstrs in from the input files
...@@ -81,6 +81,7 @@ def do_work(infiles): ...@@ -81,6 +81,7 @@ def do_work(infiles):
# get config variables # get config variables
cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
cfg_max_len = 1 << (8 * cfg_bytes_len) cfg_max_len = 1 << (8 * cfg_bytes_len)
# print out the starte of the generated C header file # print out the starte of the generated C header file
...@@ -88,11 +89,11 @@ def do_work(infiles): ...@@ -88,11 +89,11 @@ def do_work(infiles):
print('') print('')
# add NULL qstr with no hash or data # add NULL qstr with no hash or data
print('QDEF(MP_QSTR_NULL, (const byte*)"\\x00\\x00%s" "")' % ('\\x00' * cfg_bytes_len)) print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len))
# go through each qstr and print it out # go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
qhash = compute_hash(qstr) qhash = compute_hash(qstr, cfg_bytes_hash)
# Calculate len of str, taking escapes into account # Calculate len of str, taking escapes into account
qlen = len(qstr.replace("\\\\", "-").replace("\\", "")) qlen = len(qstr.replace("\\\\", "-").replace("\\", ""))
qdata = qstr.replace('"', '\\"') qdata = qstr.replace('"', '\\"')
...@@ -100,7 +101,8 @@ def do_work(infiles): ...@@ -100,7 +101,8 @@ def do_work(infiles):
print('qstr is too long:', qstr) print('qstr is too long:', qstr)
assert False assert False
qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
print('QDEF(MP_QSTR_%s, (const byte*)"\\x%02x\\x%02x%s" "%s")' % (ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen_str, qdata)) qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata))
if __name__ == "__main__": if __name__ == "__main__":
do_work(sys.argv[1:]) do_work(sys.argv[1:])
...@@ -152,6 +152,11 @@ ...@@ -152,6 +152,11 @@
#define MICROPY_QSTR_BYTES_IN_LEN (1) #define MICROPY_QSTR_BYTES_IN_LEN (1)
#endif #endif
// Number of bytes used to store qstr hash
#ifndef MICROPY_QSTR_BYTES_IN_HASH
#define MICROPY_QSTR_BYTES_IN_HASH (2)
#endif
// Avoid using C stack when making Python function calls. C stack still // Avoid using C stack when making Python function calls. C stack still
// may be used if there's no free heap. // may be used if there's no free heap.
#ifndef MICROPY_STACKLESS #ifndef MICROPY_STACKLESS
......
...@@ -43,22 +43,31 @@ ...@@ -43,22 +43,31 @@
#endif #endif
// A qstr is an index into the qstr pool. // A qstr is an index into the qstr pool.
// The data for a qstr contains (hash, length, data). // The data for a qstr contains (hash, length, data):
// For now we use very simple encoding, just to get the framework correct: // - hash (configurable number of bytes)
// - hash is 2 bytes (see function below) // - length (configurable number of bytes)
// - length is 2 bytes // - data ("length" number of bytes)
// - data follows // - \0 terminated (so they can be printed using printf)
// - \0 terminated (for now, so they can be printed using printf)
#if MICROPY_QSTR_BYTES_IN_HASH == 1
#define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8)) #define Q_HASH_MASK (0xff)
#define Q_GET_ALLOC(q) (2 + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1) #define Q_GET_HASH(q) ((mp_uint_t)(q)[0])
#define Q_GET_DATA(q) ((q) + 2 + MICROPY_QSTR_BYTES_IN_LEN) #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0)
#elif MICROPY_QSTR_BYTES_IN_HASH == 2
#define Q_HASH_MASK (0xffff)
#define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8))
#define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0)
#else
#error unimplemented qstr hash decoding
#endif
#define Q_GET_ALLOC(q) (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1)
#define Q_GET_DATA(q) ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN)
#if MICROPY_QSTR_BYTES_IN_LEN == 1 #if MICROPY_QSTR_BYTES_IN_LEN == 1
#define Q_GET_LENGTH(q) ((q)[2]) #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH])
#define Q_SET_LENGTH(q, len) do { (q)[2] = (len); } while (0) #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0)
#elif MICROPY_QSTR_BYTES_IN_LEN == 2 #elif MICROPY_QSTR_BYTES_IN_LEN == 2
#define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8)) #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8))
#define Q_SET_LENGTH(q, len) do { (q)[2] = (len); (q)[3] = (len) >> 8; } while (0) #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0)
#else #else
#error unimplemented qstr length decoding #error unimplemented qstr length decoding
#endif #endif
...@@ -70,7 +79,7 @@ mp_uint_t qstr_compute_hash(const byte *data, mp_uint_t len) { ...@@ -70,7 +79,7 @@ mp_uint_t qstr_compute_hash(const byte *data, mp_uint_t len) {
for (const byte *top = data + len; data < top; data++) { for (const byte *top = data + len; data < top; data++) {
hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data
} }
hash &= 0xffff; hash &= Q_HASH_MASK;
// Make sure that valid hash is never zero, zero means "hash not computed" // Make sure that valid hash is never zero, zero means "hash not computed"
if (hash == 0) { if (hash == 0) {
hash++; hash++;
...@@ -156,7 +165,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) { ...@@ -156,7 +165,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
// qstr does not exist in interned pool so need to add it // qstr does not exist in interned pool so need to add it
// compute number of bytes needed to intern this string // compute number of bytes needed to intern this string
mp_uint_t n_bytes = 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1; mp_uint_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) { if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
// not enough room at end of previously interned string so try to grow // not enough room at end of previously interned string so try to grow
...@@ -193,11 +202,10 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) { ...@@ -193,11 +202,10 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
// store the interned strings' data // store the interned strings' data
mp_uint_t hash = qstr_compute_hash((const byte*)str, len); mp_uint_t hash = qstr_compute_hash((const byte*)str, len);
q_ptr[0] = hash; Q_SET_HASH(q_ptr, hash);
q_ptr[1] = hash >> 8;
Q_SET_LENGTH(q_ptr, len); Q_SET_LENGTH(q_ptr, len);
memcpy(q_ptr + 2 + MICROPY_QSTR_BYTES_IN_LEN, str, len); memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len);
q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
q = qstr_add(q_ptr); q = qstr_add(q_ptr);
} }
return q; return q;
...@@ -205,7 +213,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) { ...@@ -205,7 +213,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
byte *qstr_build_start(mp_uint_t len, byte **q_ptr) { byte *qstr_build_start(mp_uint_t len, byte **q_ptr) {
assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN))); assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN)));
*q_ptr = m_new(byte, 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1); *q_ptr = m_new(byte, MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
Q_SET_LENGTH(*q_ptr, len); Q_SET_LENGTH(*q_ptr, len);
return Q_GET_DATA(*q_ptr); return Q_GET_DATA(*q_ptr);
} }
...@@ -215,9 +223,8 @@ qstr qstr_build_end(byte *q_ptr) { ...@@ -215,9 +223,8 @@ qstr qstr_build_end(byte *q_ptr) {
if (q == 0) { if (q == 0) {
mp_uint_t len = Q_GET_LENGTH(q_ptr); mp_uint_t len = Q_GET_LENGTH(q_ptr);
mp_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len); mp_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len);
q_ptr[0] = hash; Q_SET_HASH(q_ptr, hash);
q_ptr[1] = hash >> 8; q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
q = qstr_add(q_ptr); q = qstr_add(q_ptr);
} else { } else {
m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr)); m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr));
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
// qstr configuration passed to makeqstrdata.py of the form QCFG(key, value) // qstr configuration passed to makeqstrdata.py of the form QCFG(key, value)
QCFG(BYTES_IN_LEN, MICROPY_QSTR_BYTES_IN_LEN) QCFG(BYTES_IN_LEN, MICROPY_QSTR_BYTES_IN_LEN)
QCFG(BYTES_IN_HASH, MICROPY_QSTR_BYTES_IN_HASH)
Q() Q()
Q(*) Q(*)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment