Commit ae436797 authored by Damien George's avatar Damien George
Browse files

py/lexer: Use strcmp to make keyword searching more efficient.

Since the table of keywords is sorted, we can use strcmp to do the search
and stop part way through the search if the comparison is less-than.

Because all tokens that are names are subject to this search, this
optimisation will improve the overall speed of the lexer when processing
a script.

The change also decreases code size by a little bit because we now use
strcmp instead of the custom str_strn_equal function.
parent a68c7546
......@@ -25,6 +25,7 @@
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "py/mpstate.h"
......@@ -39,19 +40,6 @@
// TODO seems that CPython allows NULL byte in the input stream
// don't know if that's intentional or not, but we don't allow it
// TODO replace with a call to a standard function
STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
mp_uint_t i = 0;
while (i < len && *str == *strn) {
++i;
++str;
++strn;
}
return i == len && *str == 0;
}
#define MP_LEXER_EOF ((unichar)MP_READER_EOF)
#define CUR_CHAR(lex) ((lex)->chr0)
......@@ -225,10 +213,12 @@ STATIC const uint8_t tok_enc_kind[] = {
};
// must have the same order as enum in lexer.h
// must be sorted according to strcmp
STATIC const char *const tok_kw[] = {
"False",
"None",
"True",
"__debug__",
"and",
"as",
"assert",
......@@ -263,7 +253,6 @@ STATIC const char *const tok_kw[] = {
"while",
"with",
"yield",
"__debug__",
};
// This is called with CUR_CHAR() before first hex digit, and should return with
......@@ -531,16 +520,18 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
// We also check for __debug__ here and convert it to its value. This is
// so the parser gives a syntax error on, eg, x.__debug__. Otherwise, we
// need to check for this special token in many places in the compiler.
// TODO improve speed of these string comparisons
const char *s = vstr_null_terminated_str(&lex->vstr);
for (size_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) {
if (str_strn_equal(tok_kw[i], lex->vstr.buf, lex->vstr.len)) {
if (i == MP_ARRAY_SIZE(tok_kw) - 1) {
// tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__"
int cmp = strcmp(s, tok_kw[i]);
if (cmp == 0) {
lex->tok_kind = MP_TOKEN_KW_FALSE + i;
if (lex->tok_kind == MP_TOKEN_KW___DEBUG__) {
lex->tok_kind = (MP_STATE_VM(mp_optimise_value) == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE);
} else {
lex->tok_kind = MP_TOKEN_KW_FALSE + i;
}
break;
} else if (cmp < 0) {
// Table is sorted and comparison was less-than, so stop searching
break;
}
}
......
......@@ -61,6 +61,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_KW_FALSE, // 14
MP_TOKEN_KW_NONE,
MP_TOKEN_KW_TRUE,
MP_TOKEN_KW___DEBUG__,
MP_TOKEN_KW_AND,
MP_TOKEN_KW_AS,
MP_TOKEN_KW_ASSERT,
......@@ -71,7 +72,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_KW_BREAK,
MP_TOKEN_KW_CLASS,
MP_TOKEN_KW_CONTINUE,
MP_TOKEN_KW_DEF, // 23
MP_TOKEN_KW_DEF,
MP_TOKEN_KW_DEL,
MP_TOKEN_KW_ELIF,
MP_TOKEN_KW_ELSE,
......@@ -81,7 +82,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_KW_FROM,
MP_TOKEN_KW_GLOBAL,
MP_TOKEN_KW_IF,
MP_TOKEN_KW_IMPORT, // 33
MP_TOKEN_KW_IMPORT,
MP_TOKEN_KW_IN,
MP_TOKEN_KW_IS,
MP_TOKEN_KW_LAMBDA,
......@@ -91,12 +92,12 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_KW_PASS,
MP_TOKEN_KW_RAISE,
MP_TOKEN_KW_RETURN,
MP_TOKEN_KW_TRY, // 43
MP_TOKEN_KW_TRY,
MP_TOKEN_KW_WHILE,
MP_TOKEN_KW_WITH,
MP_TOKEN_KW_YIELD,
MP_TOKEN_OP_PLUS, // 47
MP_TOKEN_OP_PLUS,
MP_TOKEN_OP_MINUS,
MP_TOKEN_OP_STAR,
MP_TOKEN_OP_DBL_STAR,
......@@ -106,7 +107,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_OP_LESS,
MP_TOKEN_OP_DBL_LESS,
MP_TOKEN_OP_MORE,
MP_TOKEN_OP_DBL_MORE, // 57
MP_TOKEN_OP_DBL_MORE,
MP_TOKEN_OP_AMPERSAND,
MP_TOKEN_OP_PIPE,
MP_TOKEN_OP_CARET,
......@@ -116,7 +117,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_OP_DBL_EQUAL,
MP_TOKEN_OP_NOT_EQUAL,
MP_TOKEN_DEL_PAREN_OPEN, // 66
MP_TOKEN_DEL_PAREN_OPEN,
MP_TOKEN_DEL_PAREN_CLOSE,
MP_TOKEN_DEL_BRACKET_OPEN,
MP_TOKEN_DEL_BRACKET_CLOSE,
......@@ -126,7 +127,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_COLON,
MP_TOKEN_DEL_PERIOD,
MP_TOKEN_DEL_SEMICOLON,
MP_TOKEN_DEL_AT, // 76
MP_TOKEN_DEL_AT,
MP_TOKEN_DEL_EQUAL,
MP_TOKEN_DEL_PLUS_EQUAL,
MP_TOKEN_DEL_MINUS_EQUAL,
......@@ -136,7 +137,7 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_DEL_PERCENT_EQUAL,
MP_TOKEN_DEL_AMPERSAND_EQUAL,
MP_TOKEN_DEL_PIPE_EQUAL,
MP_TOKEN_DEL_CARET_EQUAL, // 86
MP_TOKEN_DEL_CARET_EQUAL,
MP_TOKEN_DEL_DBL_MORE_EQUAL,
MP_TOKEN_DEL_DBL_LESS_EQUAL,
MP_TOKEN_DEL_DBL_STAR_EQUAL,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment