Commit 5255255f authored by Damien George's avatar Damien George
Browse files

py: Create str/bytes objects in the parser, not the compiler.

Previous to this patch any non-interned str/bytes objects would create a
special parse node that held a copy of the str/bytes data.  Then in the
compiler this data would be turned into a str/bytes object.  This actually
lead to 2 copies of the data, one in the parse node and one in the object.
The parse node's copy of the data would be freed at the end of the compile
stage but nevertheless it meant that the peak memory usage of the
parse/compile stage was higher than it needed to be (by an amount equal to
the number of bytes in all the non-interned str/bytes objects).

This patch changes the behaviour so that str/bytes objects are created
directly in the parser and the object stored in a const-object parse node
(which already exists for bignum, float and complex const objects).  This
reduces peak RAM usage of the parse/compile stage, simplifies the parser
and compiler, and reduces code size by about 170 bytes on Thumb2 archs,
and by about 300 bytes on Xtensa archs.
parent f62503dc
......@@ -47,8 +47,6 @@ typedef enum {
#include "py/grammar.h"
#undef DEF_RULE
#undef DEF_RULE_NC
PN_string, // special node for non-interned string
PN_bytes, // special node for non-interned bytes
PN_const_object, // special node for a constant, generic Python object
// define rules without a compile function
#define DEF_RULE(rule, comp, kind, ...)
......@@ -1880,8 +1878,6 @@ STATIC void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
} else {
// for non-REPL, evaluate then discard the expression
if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0]))
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_bytes)
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)) {
// do nothing with a lonely constant
} else {
......@@ -2600,33 +2596,19 @@ STATIC void compile_atom_expr_await(compiler_t *comp, mp_parse_node_struct_t *pn
}
#endif
STATIC void compile_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
// only create and load the actual str object on the last pass
if (comp->pass != MP_PASS_EMIT) {
EMIT_ARG(load_const_obj, mp_const_none);
} else {
EMIT_ARG(load_const_obj, mp_obj_new_str((const char*)pns->nodes[0], pns->nodes[1], false));
}
}
STATIC void compile_bytes(compiler_t *comp, mp_parse_node_struct_t *pns) {
// only create and load the actual bytes object on the last pass
if (comp->pass != MP_PASS_EMIT) {
EMIT_ARG(load_const_obj, mp_const_none);
} else {
EMIT_ARG(load_const_obj, mp_obj_new_bytes((const byte*)pns->nodes[0], pns->nodes[1]));
}
}
STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
STATIC mp_obj_t get_const_object(mp_parse_node_struct_t *pns) {
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
// nodes are 32-bit pointers, but need to extract 64-bit object
EMIT_ARG(load_const_obj, (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
return (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32);
#else
EMIT_ARG(load_const_obj, (mp_obj_t)pns->nodes[0]);
return (mp_obj_t)pns->nodes[0];
#endif
}
STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
EMIT_ARG(load_const_obj, get_const_object(pns));
}
typedef void (*compile_function_t)(compiler_t*, mp_parse_node_struct_t*);
STATIC const compile_function_t compile_function[] = {
// only define rules with a compile function
......@@ -2637,8 +2619,6 @@ STATIC const compile_function_t compile_function[] = {
#undef c
#undef DEF_RULE
#undef DEF_RULE_NC
compile_string,
compile_bytes,
compile_const_object,
};
......@@ -2891,7 +2871,8 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) {
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0])
&& MP_PARSE_NODE_LEAF_KIND(pns->nodes[0]) == MP_PARSE_NODE_STRING)
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)) {
|| (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)
&& MP_OBJ_IS_STR(get_const_object((mp_parse_node_struct_t*)pns->nodes[0])))) {
// compile the doc string
compile_node(comp, pns->nodes[0]);
// store the doc string
......
......@@ -42,8 +42,6 @@ typedef enum {
#include "py/grammar.h"
#undef DEF_RULE
#undef DEF_RULE_NC
PN_string, // special node for non-interned string
PN_bytes, // special node for non-interned bytes
PN_const_object, // special node for a constant, generic Python object
// define rules without a compile function
#define DEF_RULE(rule, comp, kind, ...)
......
......@@ -38,6 +38,7 @@
#include "py/runtime0.h"
#include "py/runtime.h"
#include "py/objint.h"
#include "py/objstr.h"
#include "py/builtin.h"
#if MICROPY_ENABLE_COMPILER
......@@ -75,8 +76,6 @@ enum {
#include "py/grammar.h"
#undef DEF_RULE
#undef DEF_RULE_NC
RULE_string, // special node for non-interned string
RULE_bytes, // special node for non-interned bytes
RULE_const_object, // special node for a constant, generic Python object
// define rules without a compile function
......@@ -123,8 +122,6 @@ STATIC const rule_t *const rules[] = {
#include "py/grammar.h"
#undef DEF_RULE
#undef DEF_RULE_NC
NULL, // RULE_string
NULL, // RULE_bytes
NULL, // RULE_const_object
// define rules without a compile function
......@@ -326,11 +323,7 @@ void mp_parse_node_print(mp_parse_node_t pn, size_t indent) {
} else {
// node must be a mp_parse_node_struct_t
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
#else
......@@ -392,21 +385,6 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
parser->result_stack[parser->result_stack_top++] = pn;
}
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) {
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
if (pn == NULL) {
parser->parse_error = PARSE_ERROR_MEMORY;
return MP_PARSE_NODE_NULL;
}
pn->source_line = src_line;
pn->kind_num_nodes = rule_kind | (2 << 8);
char *p = m_new(char, len);
memcpy(p, str, len);
pn->nodes[0] = (uintptr_t)p;
pn->nodes[1] = len;
return (mp_parse_node_t)pn;
}
STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) {
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
if (pn == NULL) {
......@@ -473,8 +451,11 @@ STATIC void push_result_token(parser_t *parser, const rule_t *rule) {
// qstr exists, make a leaf node
pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
} else {
// not interned, make a node holding a pointer to the string/bytes data
pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
// not interned, make a node holding a pointer to the string/bytes object
mp_obj_t o = mp_obj_new_str_of_type(
lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes,
(const byte*)lex->vstr.buf, lex->vstr.len);
pn = make_node_const_object(parser, lex->tok_line, o);
}
} else {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
......@@ -934,15 +915,13 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
// this code discards lonely statements, such as doc strings
if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
mp_parse_node_t p = peek_result(&parser, 1);
if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p))
|| MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) {
pop_result(&parser); // MP_PARSE_NODE_NULL
mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]);
}
}
pop_result(&parser); // const expression (leaf or RULE_const_object)
// Pushing the "pass" rule here will overwrite any RULE_const_object
// entry that was on the result stack, allowing the GC to reclaim
// the memory from the const object when needed.
push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
break;
}
......
......@@ -15,13 +15,13 @@
str(str)
[ 8] rule(5) (n=2)
id(c)
[ 8] literal str(a very long str that will not be interned)
[ 8] literal \.\+
[ 9] rule(5) (n=2)
id(d)
bytes(bytes)
[ 10] rule(5) (n=2)
id(e)
[ 10] literal bytes(a very long bytes that will not be interned)
[ 10] literal \.\+
[ 11] rule(5) (n=2)
id(f)
[ 11] literal \.\+
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment