Commit 22b22650 authored by Damien George's avatar Damien George
Browse files

py/parse: Improve constant folding to operate on small and big ints.

Constant folding in the parser can now operate on big ints, whatever
their representation.  This is now possible because the parser can create
parse nodes holding arbitrary objects.  For the case of small ints the
folding is still efficient in RAM because the folded small int is stored
inplace in the parse node.

Adds 48 bytes to code size on Thumb2 architecture.  Helps reduce heap
usage because more constants can be computed at compile time, leading to
a smaller parse tree, and most importantly means that the constants don't
have to be computed at runtime (perhaps more than once).  Parser will now
be a little slower when folding due to calls to runtime to do the
arithmetic.
parent d6b31e45
...@@ -34,8 +34,9 @@ ...@@ -34,8 +34,9 @@
#include "py/lexer.h" #include "py/lexer.h"
#include "py/parse.h" #include "py/parse.h"
#include "py/parsenum.h" #include "py/parsenum.h"
#include "py/smallint.h" #include "py/runtime0.h"
#include "py/runtime.h" #include "py/runtime.h"
#include "py/objint.h"
#include "py/builtin.h" #include "py/builtin.h"
#if MICROPY_ENABLE_COMPILER #if MICROPY_ENABLE_COMPILER
...@@ -234,6 +235,24 @@ mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg) { ...@@ -234,6 +235,24 @@ mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg) {
return (mp_parse_node_t)(kind | (arg << 4)); return (mp_parse_node_t)(kind | (arg << 4));
} }
bool mp_parse_node_get_int_maybe(mp_parse_node_t pn, mp_obj_t *o) {
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
*o = MP_OBJ_NEW_SMALL_INT(MP_PARSE_NODE_LEAF_SMALL_INT(pn));
return true;
} else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, RULE_const_object)) {
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
// nodes are 32-bit pointers, but need to extract 64-bit object
*o = (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32);
#else
*o = (mp_obj_t)pns->nodes[0];
#endif
return MP_OBJ_IS_INT(*o);
} else {
return false;
}
}
int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes) { int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes) {
if (MP_PARSE_NODE_IS_NULL(*pn)) { if (MP_PARSE_NODE_IS_NULL(*pn)) {
*nodes = NULL; *nodes = NULL;
...@@ -445,119 +464,94 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args ...@@ -445,119 +464,94 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
// this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4 // this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4
// it does not do partial folding, eg 1 + 2 + x -> 3 + x // it does not do partial folding, eg 1 + 2 + x -> 3 + x
mp_int_t arg0; mp_obj_t arg0;
if (rule->rule_id == RULE_expr if (rule->rule_id == RULE_expr
|| rule->rule_id == RULE_xor_expr || rule->rule_id == RULE_xor_expr
|| rule->rule_id == RULE_and_expr) { || rule->rule_id == RULE_and_expr) {
// folding for binary ops: | ^ & // folding for binary ops: | ^ &
mp_parse_node_t pn = peek_result(parser, num_args - 1); mp_parse_node_t pn = peek_result(parser, num_args - 1);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
return false; return false;
} }
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn); mp_binary_op_t op;
if (rule->rule_id == RULE_expr) {
op = MP_BINARY_OP_OR;
} else if (rule->rule_id == RULE_xor_expr) {
op = MP_BINARY_OP_XOR;
} else {
op = MP_BINARY_OP_AND;
}
for (ssize_t i = num_args - 2; i >= 0; --i) { for (ssize_t i = num_args - 2; i >= 0; --i) {
pn = peek_result(parser, i); pn = peek_result(parser, i);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { mp_obj_t arg1;
if (!mp_parse_node_get_int_maybe(pn, &arg1)) {
return false; return false;
} }
mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn); arg0 = mp_binary_op(op, arg0, arg1);
if (rule->rule_id == RULE_expr) {
// int | int
arg0 |= arg1;
} else if (rule->rule_id == RULE_xor_expr) {
// int ^ int
arg0 ^= arg1;
} else if (rule->rule_id == RULE_and_expr) {
// int & int
arg0 &= arg1;
}
} }
} else if (rule->rule_id == RULE_shift_expr } else if (rule->rule_id == RULE_shift_expr
|| rule->rule_id == RULE_arith_expr || rule->rule_id == RULE_arith_expr
|| rule->rule_id == RULE_term) { || rule->rule_id == RULE_term) {
// folding for binary ops: << >> + - * / % // // folding for binary ops: << >> + - * / % //
mp_parse_node_t pn = peek_result(parser, num_args - 1); mp_parse_node_t pn = peek_result(parser, num_args - 1);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
return false; return false;
} }
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
for (ssize_t i = num_args - 2; i >= 1; i -= 2) { for (ssize_t i = num_args - 2; i >= 1; i -= 2) {
pn = peek_result(parser, i - 1); pn = peek_result(parser, i - 1);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { mp_obj_t arg1;
if (!mp_parse_node_get_int_maybe(pn, &arg1)) {
return false; return false;
} }
mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i)); mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i));
if (tok == MP_TOKEN_OP_DBL_LESS) { static const uint8_t token_to_op[] = {
// int << int MP_BINARY_OP_ADD,
if (arg1 >= (mp_int_t)BITS_PER_WORD MP_BINARY_OP_SUBTRACT,
|| arg0 > (MP_SMALL_INT_MAX >> arg1) MP_BINARY_OP_MULTIPLY,
|| arg0 < (MP_SMALL_INT_MIN >> arg1)) { 255,//MP_BINARY_OP_POWER,
return false; 255,//MP_BINARY_OP_TRUE_DIVIDE,
} MP_BINARY_OP_FLOOR_DIVIDE,
arg0 <<= arg1; MP_BINARY_OP_MODULO,
} else if (tok == MP_TOKEN_OP_DBL_MORE) { 255,//MP_BINARY_OP_LESS
// int >> int MP_BINARY_OP_LSHIFT,
if (arg1 >= (mp_int_t)BITS_PER_WORD) { 255,//MP_BINARY_OP_MORE
// Shifting to big amounts is underfined behavior MP_BINARY_OP_RSHIFT,
// in C and is CPU-dependent; propagate sign bit. };
arg1 = BITS_PER_WORD - 1; mp_binary_op_t op = token_to_op[tok - MP_TOKEN_OP_PLUS];
} if (op == 255) {
arg0 >>= arg1;
} else if (tok == MP_TOKEN_OP_PLUS) {
// int + int
arg0 += arg1;
} else if (tok == MP_TOKEN_OP_MINUS) {
// int - int
arg0 -= arg1;
} else if (tok == MP_TOKEN_OP_STAR) {
// int * int
if (mp_small_int_mul_overflow(arg0, arg1)) {
return false;
}
arg0 *= arg1;
} else if (tok == MP_TOKEN_OP_SLASH) {
// int / int
return false; return false;
} else if (tok == MP_TOKEN_OP_PERCENT) { }
// int % int int rhs_sign = mp_obj_int_sign(arg1);
if (arg1 == 0) { if (op <= MP_BINARY_OP_RSHIFT) {
// << and >> can't have negative rhs
if (rhs_sign < 0) {
return false; return false;
} }
arg0 = mp_small_int_modulo(arg0, arg1); } else if (op >= MP_BINARY_OP_FLOOR_DIVIDE) {
} else { // % and // can't have zero rhs
assert(tok == MP_TOKEN_OP_DBL_SLASH); // should be if (rhs_sign == 0) {
// int // int
if (arg1 == 0) {
return false; return false;
} }
arg0 = mp_small_int_floor_divide(arg0, arg1);
}
if (!MP_SMALL_INT_FITS(arg0)) {
return false;
} }
arg0 = mp_binary_op(op, arg0, arg1);
} }
} else if (rule->rule_id == RULE_factor_2) { } else if (rule->rule_id == RULE_factor_2) {
// folding for unary ops: + - ~ // folding for unary ops: + - ~
mp_parse_node_t pn = peek_result(parser, 0); mp_parse_node_t pn = peek_result(parser, 0);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
return false; return false;
} }
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1)); mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1));
mp_binary_op_t op;
if (tok == MP_TOKEN_OP_PLUS) { if (tok == MP_TOKEN_OP_PLUS) {
// +int op = MP_UNARY_OP_POSITIVE;
} else if (tok == MP_TOKEN_OP_MINUS) { } else if (tok == MP_TOKEN_OP_MINUS) {
// -int op = MP_UNARY_OP_NEGATIVE;
arg0 = -arg0;
if (!MP_SMALL_INT_FITS(arg0)) {
return false;
}
} else { } else {
assert(tok == MP_TOKEN_OP_TILDE); // should be assert(tok == MP_TOKEN_OP_TILDE); // should be
// ~int op = MP_UNARY_OP_INVERT;
arg0 = ~arg0;
} }
arg0 = mp_unary_op(op, arg0);
#if MICROPY_COMP_CONST #if MICROPY_COMP_CONST
} else if (rule->rule_id == RULE_expr_stmt) { } else if (rule->rule_id == RULE_expr_stmt) {
...@@ -625,10 +619,10 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args ...@@ -625,10 +619,10 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
} }
mp_obj_t dest[2]; mp_obj_t dest[2];
mp_load_method_maybe(elem->value, q_attr, dest); mp_load_method_maybe(elem->value, q_attr, dest);
if (!(MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) { if (!(MP_OBJ_IS_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) {
return false; return false;
} }
arg0 = MP_OBJ_SMALL_INT_VALUE(dest[0]); arg0 = dest[0];
#endif #endif
} else { } else {
...@@ -640,7 +634,12 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args ...@@ -640,7 +634,12 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
for (size_t i = num_args; i > 0; i--) { for (size_t i = num_args; i > 0; i--) {
pop_result(parser); pop_result(parser);
} }
push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0)); if (MP_OBJ_IS_SMALL_INT(arg0)) {
push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(arg0)));
} else {
// TODO reuse memory for parse node struct?
push_result_node(parser, make_node_const_object(parser, 0, arg0));
}
return true; return true;
} }
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "py/mpconfig.h" #include "py/obj.h"
struct _mp_lexer_t; struct _mp_lexer_t;
...@@ -77,6 +77,7 @@ typedef struct _mp_parse_node_struct_t { ...@@ -77,6 +77,7 @@ typedef struct _mp_parse_node_struct_t {
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8) #define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg); mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg);
bool mp_parse_node_get_int_maybe(mp_parse_node_t pn, mp_obj_t *o);
int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes); int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes);
void mp_parse_node_print(mp_parse_node_t pn, size_t indent); void mp_parse_node_print(mp_parse_node_t pn, size_t indent);
......
...@@ -70,20 +70,22 @@ try: ...@@ -70,20 +70,22 @@ try:
except NotImplementedError: except NotImplementedError:
print('NotImplementedError') print('NotImplementedError')
mpz = 1 << 70
# mpz and with both args negative # mpz and with both args negative
try: try:
-(1<<70) & -2 -mpz & -2
except NotImplementedError: except NotImplementedError:
print('NotImplementedError') print('NotImplementedError')
# mpz or with args opposite sign # mpz or with args opposite sign
try: try:
-(1<<70) | 2 -mpz | 2
except NotImplementedError: except NotImplementedError:
print('NotImplementedError') print('NotImplementedError')
# mpz xor with args opposite sign # mpz xor with args opposite sign
try: try:
-(1<<70) ^ 2 -mpz ^ 2
except NotImplementedError: except NotImplementedError:
print('NotImplementedError') print('NotImplementedError')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment