Commit 95908b0f authored by Paul Sokolovsky's avatar Paul Sokolovsky
Browse files

modure: Update to re1.5 v0.6, support for char sets/classes ([a-c]).

parent d27c0bb3
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#if MICROPY_PY_URE #if MICROPY_PY_URE
#include "re1.5/regexp.h" #include "re1.5/re1.5.h"
#define FLAG_DEBUG 0x1000 #define FLAG_DEBUG 0x1000
...@@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = { ...@@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = {
#include "re1.5/compilecode.c" #include "re1.5/compilecode.c"
#include "re1.5/dumpcode.c" #include "re1.5/dumpcode.c"
#include "re1.5/recursiveloop.c" #include "re1.5/recursiveloop.c"
#include "re1.5/charclass.c"
#endif //MICROPY_PY_URE #endif //MICROPY_PY_URE
#include "re1.5.h"
int _re1_5_classmatch(const char *pc, const char *sp)
{
// pc points to "cnt" byte after opcode
int cnt = *pc++;
while (cnt--) {
if (!(*sp >= *pc && *sp <= pc[1])) return 0;
}
return 1;
}
\ No newline at end of file
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
#include "regexp.h" #include "re1.5.h"
static void insert_code(char *code, int at, int num, int *pc) static void insert_code(char *code, int at, int num, int *pc)
{ {
...@@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re) ...@@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re)
break; break;
case ')': case ')':
break; break;
case '[': {
pc += 2;
re++;
while (*re != ']') {
if (!*re) return -1;
if (re[1] == '-') {
re += 2;
}
pc += 2;
re++;
}
}
} }
} }
...@@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog) ...@@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog)
EMIT(pc++, Any); EMIT(pc++, Any);
prog->len++; prog->len++;
break; break;
case '[': {
int cnt;
term = pc;
EMIT(pc++, Class);
pc++; // Skip # of pair byte
prog->len++;
re++;
for (cnt = 0; *re != ']'; re++, cnt++) {
if (!*re) return NULL;
EMIT(pc++, *re);
if (re[1] == '-') {
re += 2;
}
EMIT(pc++, *re);
}
EMIT(term + 1, cnt);
break;
}
case '(': case '(':
term = pc; term = pc;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
#include "regexp.h" #include "re1.5.h"
void re1_5_dumpcode(ByteProg *prog) void re1_5_dumpcode(ByteProg *prog)
{ {
...@@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog) ...@@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog)
case Any: case Any:
printf("any\n"); printf("any\n");
break; break;
case Class: {
int num = code[pc++];
printf("class %d", num);
while (num--) {
printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
pc += 2;
}
printf("\n");
break;
}
case Match: case Match:
printf("match\n"); printf("match\n");
break; break;
......
...@@ -80,14 +80,18 @@ enum /* Inst.opcode */ ...@@ -80,14 +80,18 @@ enum /* Inst.opcode */
CONSUMERS = 1, CONSUMERS = 1,
Char = CONSUMERS, Char = CONSUMERS,
Any, Any,
Class,
ASSERTS = 0x50, ASSERTS = 0x50,
Bol = ASSERTS, Bol = ASSERTS,
Eol, Eol,
// Instructions which take relative offset as arg // Instructions which take relative offset as arg
JUMPS = 0x60, JUMPS = 0x60,
Jmp = JUMPS, Jmp = JUMPS,
Split, Split,
RSplit, RSplit,
// Other (special) instructions // Other (special) instructions
Save = 0x7e, Save = 0x7e,
Match = 0x7f, Match = 0x7f,
...@@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re); ...@@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re);
int re1_5_compilecode(ByteProg *prog, const char *re); int re1_5_compilecode(ByteProg *prog, const char *re);
void re1_5_dumpcode(ByteProg *prog); void re1_5_dumpcode(ByteProg *prog);
void cleanmarks(ByteProg *prog); void cleanmarks(ByteProg *prog);
int _re1_5_classmatch(const char *pc, const char *sp);
#endif /*_RE1_5_REGEXP__H*/ #endif /*_RE1_5_REGEXP__H*/
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
#include "regexp.h" #include "re1.5.h"
static int static int
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp) recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
...@@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n ...@@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
case Any: case Any:
sp++; sp++;
continue; continue;
case Class:
if (!_re1_5_classmatch(pc, sp))
return 0;
pc += *(unsigned char*)pc * 2 + 1;
sp++;
continue;
case Match: case Match:
return 1; return 1;
case Jmp: case Jmp:
......
...@@ -20,6 +20,13 @@ try: ...@@ -20,6 +20,13 @@ try:
except IndexError: except IndexError:
print("IndexError") print("IndexError")
r = re.compile("[a-c]")
m = r.match("a")
print(m.group(0))
m = r.match("d")
print(m)
m = r.match("A")
print(m)
r = re.compile("o+") r = re.compile("o+")
m = r.search("foobar") m = r.search("foobar")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment