summaryrefslogtreecommitdiff
path: root/pcre/pcre_jit_compile.c
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mariadb.org>2016-06-21 16:02:35 +0200
committerSergei Golubchik <serg@mariadb.org>2016-06-21 16:02:35 +0200
commit1592fbd332373a110edcc28468cd8dc2b99271ef (patch)
treeea55441a8d982b7f1593d7fb00f0db7d63924381 /pcre/pcre_jit_compile.c
parente7591a1ba94f404a87e65554298574bfa97020f2 (diff)
downloadmariadb-git-1592fbd332373a110edcc28468cd8dc2b99271ef.tar.gz
8.39
Diffstat (limited to 'pcre/pcre_jit_compile.c')
-rw-r--r--pcre/pcre_jit_compile.c2875
1 files changed, 1934 insertions, 941 deletions
diff --git a/pcre/pcre_jit_compile.c b/pcre/pcre_jit_compile.c
index 445de0cbefe..4f15a27ac28 100644
--- a/pcre/pcre_jit_compile.c
+++ b/pcre/pcre_jit_compile.c
@@ -168,13 +168,13 @@ typedef struct jit_arguments {
pcre_uchar *mark_ptr;
void *callout_data;
/* Everything else after. */
- pcre_uint32 limit_match;
+ sljit_u32 limit_match;
int real_offset_count;
int offset_count;
- pcre_uint8 notbol;
- pcre_uint8 noteol;
- pcre_uint8 notempty;
- pcre_uint8 notempty_atstart;
+ sljit_u8 notbol;
+ sljit_u8 noteol;
+ sljit_u8 notempty;
+ sljit_u8 notempty_atstart;
} jit_arguments;
typedef struct executable_functions {
@@ -183,8 +183,8 @@ typedef struct executable_functions {
sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
PUBL(jit_callback) callback;
void *userdata;
- pcre_uint32 top_bracket;
- pcre_uint32 limit_match;
+ sljit_u32 top_bracket;
+ sljit_u32 limit_match;
} executable_functions;
typedef struct jump_list {
@@ -277,11 +277,25 @@ typedef struct braminzero_backtrack {
struct sljit_label *matchingpath;
} braminzero_backtrack;
-typedef struct iterator_backtrack {
+typedef struct char_iterator_backtrack {
backtrack_common common;
/* Next iteration. */
struct sljit_label *matchingpath;
-} iterator_backtrack;
+ union {
+ jump_list *backtracks;
+ struct {
+ unsigned int othercasebit;
+ pcre_uchar chr;
+ BOOL enabled;
+ } charpos;
+ } u;
+} char_iterator_backtrack;
+
+typedef struct ref_iterator_backtrack {
+ backtrack_common common;
+ /* Next iteration. */
+ struct sljit_label *matchingpath;
+} ref_iterator_backtrack;
typedef struct recurse_entry {
struct recurse_entry *next;
@@ -321,40 +335,46 @@ typedef struct compiler_common {
/* First byte code. */
pcre_uchar *start;
/* Maps private data offset to each opcode. */
- sljit_si *private_data_ptrs;
+ sljit_s32 *private_data_ptrs;
/* Chain list of read-only data ptrs. */
void *read_only_data_head;
/* Tells whether the capturing bracket is optimized. */
- pcre_uint8 *optimized_cbracket;
+ sljit_u8 *optimized_cbracket;
/* Tells whether the starting offset is a target of then. */
- pcre_uint8 *then_offsets;
+ sljit_u8 *then_offsets;
/* Current position where a THEN must jump. */
then_trap_backtrack *then_trap;
/* Starting offset of private data for capturing brackets. */
- int cbra_ptr;
+ sljit_s32 cbra_ptr;
/* Output vector starting point. Must be divisible by 2. */
- int ovector_start;
+ sljit_s32 ovector_start;
+ /* Points to the starting character of the current match. */
+ sljit_s32 start_ptr;
/* Last known position of the requested byte. */
- int req_char_ptr;
+ sljit_s32 req_char_ptr;
/* Head of the last recursion. */
- int recursive_head_ptr;
- /* First inspected character for partial matching. */
- int start_used_ptr;
+ sljit_s32 recursive_head_ptr;
+ /* First inspected character for partial matching.
+ (Needed for avoiding zero length partial matches.) */
+ sljit_s32 start_used_ptr;
/* Starting pointer for partial soft matches. */
- int hit_start;
- /* End pointer of the first line. */
- int first_line_end;
+ sljit_s32 hit_start;
+ /* Pointer of the match end position. */
+ sljit_s32 match_end_ptr;
/* Points to the marked string. */
- int mark_ptr;
+ sljit_s32 mark_ptr;
/* Recursive control verb management chain. */
- int control_head_ptr;
+ sljit_s32 control_head_ptr;
/* Points to the last matched capture block index. */
- int capture_last_ptr;
- /* Points to the starting position of the current match. */
- int start_ptr;
+ sljit_s32 capture_last_ptr;
+ /* Fast forward skipping byte code pointer. */
+ pcre_uchar *fast_forward_bc_ptr;
+ /* Locals used by fast fail optimization. */
+ sljit_s32 fast_fail_start_ptr;
+ sljit_s32 fast_fail_end_ptr;
/* Flipped and lower case tables. */
- const pcre_uint8 *fcc;
+ const sljit_u8 *fcc;
sljit_sw lcc;
/* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
int mode;
@@ -366,20 +386,20 @@ typedef struct compiler_common {
BOOL has_skip_arg;
/* (*THEN) is found in the pattern. */
BOOL has_then;
- /* Needs to know the start position anytime. */
- BOOL needs_start_ptr;
+ /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
+ BOOL has_skip_in_assert_back;
/* Currently in recurse or negative assert. */
BOOL local_exit;
/* Currently in a positive assert. */
BOOL positive_assert;
/* Newline control. */
int nltype;
- pcre_uint32 nlmax;
- pcre_uint32 nlmin;
+ sljit_u32 nlmax;
+ sljit_u32 nlmin;
int newline;
int bsr_nltype;
- pcre_uint32 bsr_nlmax;
- pcre_uint32 bsr_nlmin;
+ sljit_u32 bsr_nlmax;
+ sljit_u32 bsr_nlmin;
/* Dollar endonly. */
int endonly;
/* Tables. */
@@ -419,6 +439,7 @@ typedef struct compiler_common {
BOOL utf;
#ifdef SUPPORT_UCP
BOOL use_ucp;
+ jump_list *getucd;
#endif
#ifdef COMPILE_PCRE8
jump_list *utfreadchar;
@@ -426,9 +447,6 @@ typedef struct compiler_common {
jump_list *utfreadtype8;
#endif
#endif /* SUPPORT_UTF */
-#ifdef SUPPORT_UCP
- jump_list *getucd;
-#endif
} compiler_common;
/* For byte_sequence_compare. */
@@ -439,27 +457,27 @@ typedef struct compare_context {
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
int ucharptr;
union {
- sljit_si asint;
- sljit_uh asushort;
+ sljit_s32 asint;
+ sljit_u16 asushort;
#if defined COMPILE_PCRE8
- sljit_ub asbyte;
- sljit_ub asuchars[4];
+ sljit_u8 asbyte;
+ sljit_u8 asuchars[4];
#elif defined COMPILE_PCRE16
- sljit_uh asuchars[2];
+ sljit_u16 asuchars[2];
#elif defined COMPILE_PCRE32
- sljit_ui asuchars[1];
+ sljit_u32 asuchars[1];
#endif
} c;
union {
- sljit_si asint;
- sljit_uh asushort;
+ sljit_s32 asint;
+ sljit_u16 asushort;
#if defined COMPILE_PCRE8
- sljit_ub asbyte;
- sljit_ub asuchars[4];
+ sljit_u8 asbyte;
+ sljit_u8 asuchars[4];
#elif defined COMPILE_PCRE16
- sljit_uh asuchars[2];
+ sljit_u16 asuchars[2];
#elif defined COMPILE_PCRE32
- sljit_ui asuchars[1];
+ sljit_u32 asuchars[1];
#endif
} oc;
#endif
@@ -501,14 +519,14 @@ the start pointers when the end of the capturing group has not yet reached. */
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
#if defined COMPILE_PCRE8
-#define MOV_UCHAR SLJIT_MOV_UB
-#define MOVU_UCHAR SLJIT_MOVU_UB
+#define MOV_UCHAR SLJIT_MOV_U8
+#define MOVU_UCHAR SLJIT_MOVU_U8
#elif defined COMPILE_PCRE16
-#define MOV_UCHAR SLJIT_MOV_UH
-#define MOVU_UCHAR SLJIT_MOVU_UH
+#define MOV_UCHAR SLJIT_MOV_U16
+#define MOVU_UCHAR SLJIT_MOVU_U16
#elif defined COMPILE_PCRE32
-#define MOV_UCHAR SLJIT_MOV_UI
-#define MOVU_UCHAR SLJIT_MOVU_UI
+#define MOV_UCHAR SLJIT_MOV_U32
+#define MOVU_UCHAR SLJIT_MOVU_U32
#else
#error Unsupported compiling mode
#endif
@@ -564,11 +582,6 @@ SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
return count;
}
-static int ones_in_half_byte[16] = {
- /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
- /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
-};
-
/* Functions whose might need modification for all new supported opcodes:
next_opcode
check_opcode_types
@@ -780,6 +793,7 @@ static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uch
{
int count;
pcre_uchar *slot;
+pcre_uchar *assert_back_end = cc - 1;
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
while (cc < ccend)
@@ -850,15 +864,19 @@ while (cc < ccend)
cc += 2 + 2 * LINK_SIZE;
break;
+ case OP_ASSERTBACK:
+ slot = bracketend(cc);
+ if (slot > assert_back_end)
+ assert_back_end = slot;
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_THEN_ARG:
common->has_then = TRUE;
common->control_head_ptr = 1;
/* Fall through. */
case OP_PRUNE_ARG:
- common->needs_start_ptr = TRUE;
- /* Fall through. */
-
case OP_MARK:
if (common->mark_ptr == 0)
{
@@ -871,17 +889,20 @@ while (cc < ccend)
case OP_THEN:
common->has_then = TRUE;
common->control_head_ptr = 1;
- /* Fall through. */
+ cc += 1;
+ break;
- case OP_PRUNE:
case OP_SKIP:
- common->needs_start_ptr = TRUE;
+ if (cc < assert_back_end)
+ common->has_skip_in_assert_back = TRUE;
cc += 1;
break;
case OP_SKIP_ARG:
common->control_head_ptr = 1;
common->has_skip_arg = TRUE;
+ if (cc < assert_back_end)
+ common->has_skip_in_assert_back = TRUE;
cc += 1 + 2 + cc[1];
break;
@@ -895,8 +916,189 @@ while (cc < ccend)
return TRUE;
}
+static BOOL is_accelerated_repeat(pcre_uchar *cc)
+{
+switch(*cc)
+ {
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
+
+ case OP_STAR:
+ case OP_MINSTAR:
+ case OP_PLUS:
+ case OP_MINPLUS:
+ case OP_POSSTAR:
+ case OP_POSPLUS:
+
+ case OP_STARI:
+ case OP_MINSTARI:
+ case OP_PLUSI:
+ case OP_MINPLUSI:
+ case OP_POSSTARI:
+ case OP_POSPLUSI:
+
+ case OP_NOTSTAR:
+ case OP_NOTMINSTAR:
+ case OP_NOTPLUS:
+ case OP_NOTMINPLUS:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSPLUS:
+
+ case OP_NOTSTARI:
+ case OP_NOTMINSTARI:
+ case OP_NOTPLUSI:
+ case OP_NOTMINPLUSI:
+ case OP_NOTPOSSTARI:
+ case OP_NOTPOSPLUSI:
+ return TRUE;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
+#else
+ cc += (1 + (32 / sizeof(pcre_uchar)));
+#endif
+
+ switch(*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ return TRUE;
+ }
+ break;
+ }
+return FALSE;
+}
+
+static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
+{
+pcre_uchar *cc = common->start;
+pcre_uchar *end;
+
+/* Skip not repeated brackets. */
+while (TRUE)
+ {
+ switch(*cc)
+ {
+ case OP_SOD:
+ case OP_SOM:
+ case OP_SET_SOM:
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ /* Zero width assertions. */
+ cc++;
+ continue;
+ }
+
+ if (*cc != OP_BRA && *cc != OP_CBRA)
+ break;
+
+ end = cc + GET(cc, 1);
+ if (*end != OP_KET || PRIVATE_DATA(end) != 0)
+ return FALSE;
+ if (*cc == OP_CBRA)
+ {
+ if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ return FALSE;
+ cc += IMM2_SIZE;
+ }
+ cc += 1 + LINK_SIZE;
+ }
+
+if (is_accelerated_repeat(cc))
+ {
+ common->fast_forward_bc_ptr = cc;
+ common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
+ *private_data_start += sizeof(sljit_sw);
+ return TRUE;
+ }
+return FALSE;
+}
+
+static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
+{
+ pcre_uchar *next_alt;
+
+ SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
+
+ if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ return;
+
+ next_alt = bracketend(cc) - (1 + LINK_SIZE);
+ if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
+ return;
+
+ do
+ {
+ next_alt = cc + GET(cc, 1);
+
+ cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
+
+ while (TRUE)
+ {
+ switch(*cc)
+ {
+ case OP_SOD:
+ case OP_SOM:
+ case OP_SET_SOM:
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ /* Zero width assertions. */
+ cc++;
+ continue;
+ }
+ break;
+ }
+
+ if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
+ detect_fast_fail(common, cc, private_data_start, depth - 1);
+
+ if (is_accelerated_repeat(cc))
+ {
+ common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
+
+ if (common->fast_fail_start_ptr == 0)
+ common->fast_fail_start_ptr = *private_data_start;
+
+ *private_data_start += sizeof(sljit_sw);
+ common->fast_fail_end_ptr = *private_data_start;
+
+ if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
+ return;
+ }
+
+ cc = next_alt;
+ }
+ while (*cc == OP_ALT);
+}
+
static int get_class_iterator_size(pcre_uchar *cc)
{
+sljit_u32 min;
+sljit_u32 max;
switch(*cc)
{
case OP_CRSTAR:
@@ -911,9 +1113,14 @@ switch(*cc)
case OP_CRRANGE:
case OP_CRMINRANGE:
- if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
- return 0;
- return 2;
+ min = GET2(cc, 1);
+ max = GET2(cc, 1 + IMM2_SIZE);
+ if (max == 0)
+ return (*cc == OP_CRRANGE) ? 2 : 1;
+ max -= min;
+ if (max > 2)
+ max = 2;
+ return max;
default:
return 0;
@@ -1186,14 +1393,14 @@ while (cc < ccend)
case OP_CLASS:
case OP_NCLASS:
- size += 1 + 32 / sizeof(pcre_uchar);
space = get_class_iterator_size(cc + size);
+ size = 1 + 32 / sizeof(pcre_uchar);
break;
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
- size = GET(cc, 1);
space = get_class_iterator_size(cc + size);
+ size = GET(cc, 1);
break;
#endif
@@ -1406,6 +1613,7 @@ while (cc < ccend)
case OP_CLASS:
case OP_NCLASS:
case OP_XCLASS:
+ case OP_CALLOUT:
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
@@ -1990,7 +2198,7 @@ if (save)
SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
}
-static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
+static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
{
pcre_uchar *end = bracketend(cc);
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
@@ -2113,6 +2321,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
/* May destroy all locals and registers except TMP2. */
DEFINE_COMPILER;
+SLJIT_ASSERT(size > 0);
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
#ifdef DESTROY_REGISTERS
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
@@ -2127,6 +2336,8 @@ add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
{
DEFINE_COMPILER;
+
+SLJIT_ASSERT(size > 0);
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
}
@@ -2190,6 +2401,18 @@ else
}
}
+static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
+{
+DEFINE_COMPILER;
+sljit_s32 i;
+
+SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
+
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
+}
+
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
{
DEFINE_COMPILER;
@@ -2262,7 +2485,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
if (common->mark_ptr != 0)
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
-OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
+OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
if (common->mark_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
@@ -2277,7 +2500,7 @@ OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
+OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
JUMPHERE(early_quit);
@@ -2310,7 +2533,7 @@ SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
-OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
+OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
/* Store match begin and end. */
@@ -2322,7 +2545,7 @@ OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HA
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
JUMPHERE(jump);
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
@@ -2330,13 +2553,13 @@ OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
JUMPTO(SLJIT_JUMP, quit);
}
@@ -2573,7 +2796,7 @@ else
JUMPHERE(jump);
}
-static void peek_char(compiler_common *common, pcre_uint32 max)
+static void peek_char(compiler_common *common, sljit_u32 max)
{
/* Reads the character into TMP1, keeps STR_PTR.
Does not check STR_END. TMP2 Destroyed. */
@@ -2618,12 +2841,12 @@ if (common->utf)
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
-static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
+static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
{
/* Tells whether the character codes below 128 are enough
to determine a match. */
-const pcre_uint8 value = nclass ? 0xff : 0;
-const pcre_uint8 *end = bitset + 32;
+const sljit_u8 value = nclass ? 0xff : 0;
+const sljit_u8 *end = bitset + 32;
bitset += 16;
do
@@ -2648,12 +2871,12 @@ SLJIT_ASSERT(common->utf);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
if (full_read)
{
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
JUMPHERE(jump);
}
@@ -2661,7 +2884,7 @@ if (full_read)
#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
-static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
+static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
{
/* Reads the precise value of a character into TMP1, if the character is
between min and max (c >= min && c <= max). Otherwise it returns with a value
@@ -2692,7 +2915,7 @@ if (common->utf)
{
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
if (update_str_ptr)
- OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
@@ -2716,7 +2939,7 @@ if (common->utf)
{
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
if (update_str_ptr)
- OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
@@ -2736,7 +2959,7 @@ if (common->utf)
add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
else if (max < 128)
{
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
}
else
@@ -2745,7 +2968,7 @@ if (common->utf)
if (!update_str_ptr)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
else
- OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
@@ -2815,7 +3038,7 @@ if (common->utf)
{
/* This can be an extra read in some situations, but hopefully
it is needed in most cases. */
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
if (!update_str_ptr)
{
@@ -2827,7 +3050,7 @@ if (common->utf)
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
JUMPHERE(jump2);
}
else
@@ -2842,7 +3065,7 @@ if (common->utf)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
#endif
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
#if !defined COMPILE_PCRE8
JUMPHERE(jump);
#endif
@@ -3034,7 +3257,7 @@ compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
JUMPHERE(compare);
@@ -3043,7 +3266,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
/* We only have types for characters less than 256. */
JUMPHERE(jump);
-OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
@@ -3069,26 +3292,26 @@ SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
-OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
-OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
+OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
#endif
-static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
+static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
{
DEFINE_COMPILER;
struct sljit_label *mainloop;
struct sljit_label *newlinelabel = NULL;
struct sljit_jump *start;
struct sljit_jump *end = NULL;
-struct sljit_jump *nl = NULL;
+struct sljit_jump *end2 = NULL;
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
struct sljit_jump *singlechar;
#endif
@@ -3096,14 +3319,13 @@ jump_list *newline = NULL;
BOOL newlinecheck = FALSE;
BOOL readuchar = FALSE;
-if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
- common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
+if (!(hascrorlf || (common->match_end_ptr != 0)) &&
+ (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
newlinecheck = TRUE;
-if (firstline)
+if (common->match_end_ptr != 0)
{
/* Search for the end of the first line. */
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
@@ -3116,19 +3338,19 @@ if (firstline)
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
JUMPHERE(end);
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
else
{
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
mainloop = LABEL();
/* Continual stores does not cause data dependency. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
read_char_range(common, common->nlmin, common->nlmax, TRUE);
check_newlinechar(common, common->nltype, &newline, TRUE);
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
JUMPHERE(end);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
set_jumps(newline, LABEL());
}
@@ -3149,7 +3371,7 @@ if (newlinecheck)
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- nl = JUMP(SLJIT_JUMP);
+ end2 = JUMP(SLJIT_JUMP);
}
mainloop = LABEL();
@@ -3172,7 +3394,7 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
if (common->utf)
{
singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(singlechar);
}
@@ -3194,51 +3416,52 @@ JUMPHERE(start);
if (newlinecheck)
{
JUMPHERE(end);
- JUMPHERE(nl);
+ JUMPHERE(end2);
}
return mainloop;
}
#define MAX_N_CHARS 16
-#define MAX_N_BYTES 8
+#define MAX_DIFF_CHARS 6
-static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
+static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
{
-pcre_uint8 len = bytes[0];
-int i;
+pcre_uchar i, len;
+len = chars[0];
if (len == 255)
return;
if (len == 0)
{
- bytes[0] = 1;
- bytes[1] = byte;
+ chars[0] = 1;
+ chars[1] = chr;
return;
}
for (i = len; i > 0; i--)
- if (bytes[i] == byte)
+ if (chars[i] == chr)
return;
-if (len >= MAX_N_BYTES - 1)
+if (len >= MAX_DIFF_CHARS - 1)
{
- bytes[0] = 255;
+ chars[0] = 255;
return;
}
len++;
-bytes[len] = byte;
-bytes[0] = len;
+chars[len] = chr;
+chars[0] = len;
}
-static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
+static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
{
/* Recursive function, which scans prefix literals. */
-BOOL last, any, caseless;
+BOOL last, any, class, caseless;
int len, repeat, len_save, consumed = 0;
-pcre_uint32 chr, mask;
+sljit_u32 chr; /* Any unicode character. */
+sljit_u8 *bytes, *bytes_end, byte;
pcre_uchar *alternative, *cc_save, *oc;
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
pcre_uchar othercase[8];
@@ -3257,6 +3480,7 @@ while (TRUE)
last = TRUE;
any = FALSE;
+ class = FALSE;
caseless = FALSE;
switch (*cc)
@@ -3320,7 +3544,7 @@ while (TRUE)
#ifdef SUPPORT_UTF
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
#endif
- max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
+ max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
if (max_chars == 0)
return consumed;
last = FALSE;
@@ -3343,7 +3567,7 @@ while (TRUE)
alternative = cc + GET(cc, 1);
while (*alternative == OP_ALT)
{
- max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
+ max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
if (max_chars == 0)
return consumed;
alternative += GET(alternative, 1);
@@ -3356,18 +3580,17 @@ while (TRUE)
case OP_CLASS:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
+ if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
+ return consumed;
#endif
- any = TRUE;
- cc += 1 + 32 / sizeof(pcre_uchar);
+ class = TRUE;
break;
case OP_NCLASS:
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (common->utf) return consumed;
#endif
- any = TRUE;
- cc += 1 + 32 / sizeof(pcre_uchar);
+ class = TRUE;
break;
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
@@ -3382,7 +3605,7 @@ while (TRUE)
case OP_DIGIT:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
+ if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
return consumed;
#endif
any = TRUE;
@@ -3391,7 +3614,7 @@ while (TRUE)
case OP_WHITESPACE:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
+ if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
return consumed;
#endif
any = TRUE;
@@ -3400,7 +3623,7 @@ while (TRUE)
case OP_WORDCHAR:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
+ if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
return consumed;
#endif
any = TRUE;
@@ -3423,10 +3646,10 @@ while (TRUE)
cc++;
break;
-#ifdef SUPPORT_UCP
+#ifdef SUPPORT_UTF
case OP_NOTPROP:
case OP_PROP:
-#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#ifndef COMPILE_PCRE32
if (common->utf) return consumed;
#endif
any = TRUE;
@@ -3455,30 +3678,114 @@ while (TRUE)
if (any)
{
-#if defined COMPILE_PCRE8
- mask = 0xff;
-#elif defined COMPILE_PCRE16
- mask = 0xffff;
-#elif defined COMPILE_PCRE32
- mask = 0xffffffff;
-#else
- SLJIT_ASSERT_STOP();
-#endif
+ do
+ {
+ chars[0] = 255;
+
+ consumed++;
+ if (--max_chars == 0)
+ return consumed;
+ chars += MAX_DIFF_CHARS;
+ }
+ while (--repeat > 0);
+
+ repeat = 1;
+ continue;
+ }
+
+ if (class)
+ {
+ bytes = (sljit_u8*) (cc + 1);
+ cc += 1 + 32 / sizeof(pcre_uchar);
+
+ switch (*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRPOSSTAR:
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ case OP_CRPOSQUERY:
+ max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
+ if (max_chars == 0)
+ return consumed;
+ break;
+
+ default:
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
+ repeat = GET2(cc, 1);
+ if (repeat <= 0)
+ return consumed;
+ break;
+ }
do
{
- chars[0] = mask;
- chars[1] = mask;
- bytes[0] = 255;
+ if (bytes[31] & 0x80)
+ chars[0] = 255;
+ else if (chars[0] != 255)
+ {
+ bytes_end = bytes + 32;
+ chr = 0;
+ do
+ {
+ byte = *bytes++;
+ SLJIT_ASSERT((chr & 0x7) == 0);
+ if (byte == 0)
+ chr += 8;
+ else
+ {
+ do
+ {
+ if ((byte & 0x1) != 0)
+ add_prefix_char(chr, chars);
+ byte >>= 1;
+ chr++;
+ }
+ while (byte != 0);
+ chr = (chr + 7) & ~7;
+ }
+ }
+ while (chars[0] != 255 && bytes < bytes_end);
+ bytes = bytes_end - 32;
+ }
consumed++;
if (--max_chars == 0)
return consumed;
- chars += 2;
- bytes += MAX_N_BYTES;
+ chars += MAX_DIFF_CHARS;
}
while (--repeat > 0);
+ switch (*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRPOSSTAR:
+ return consumed;
+
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ case OP_CRPOSQUERY:
+ cc++;
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
+ if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
+ return consumed;
+ cc += 1 + 2 * IMM2_SIZE;
+ break;
+ }
+
repeat = 1;
continue;
}
@@ -3505,7 +3812,10 @@ while (TRUE)
}
}
else
+ {
caseless = FALSE;
+ othercase[0] = 0; /* Stops compiler warning - PH */
+ }
len_save = len;
cc_save = cc;
@@ -3515,43 +3825,16 @@ while (TRUE)
do
{
chr = *cc;
-#ifdef COMPILE_PCRE32
- if (SLJIT_UNLIKELY(chr == NOTACHAR))
- return consumed;
-#endif
- add_prefix_byte((pcre_uint8)chr, bytes);
+ add_prefix_char(*cc, chars);
- mask = 0;
if (caseless)
- {
- add_prefix_byte((pcre_uint8)*oc, bytes);
- mask = *cc ^ *oc;
- chr |= mask;
- }
-
-#ifdef COMPILE_PCRE32
- if (chars[0] == NOTACHAR && chars[1] == 0)
-#else
- if (chars[0] == NOTACHAR)
-#endif
- {
- chars[0] = chr;
- chars[1] = mask;
- }
- else
- {
- mask |= chars[0] ^ chr;
- chr |= mask;
- chars[0] = chr;
- chars[1] |= mask;
- }
+ add_prefix_char(*oc, chars);
len--;
consumed++;
if (--max_chars == 0)
return consumed;
- chars += 2;
- bytes += MAX_N_BYTES;
+ chars += MAX_DIFF_CHARS;
cc++;
oc++;
}
@@ -3570,163 +3853,576 @@ while (TRUE)
}
}
-static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
+static sljit_s32 character_to_int32(pcre_uchar chr)
+{
+sljit_s32 value = (sljit_s32)chr;
+#if defined COMPILE_PCRE8
+#define SSE2_COMPARE_TYPE_INDEX 0
+return (value << 24) | (value << 16) | (value << 8) | value;
+#elif defined COMPILE_PCRE16
+#define SSE2_COMPARE_TYPE_INDEX 1
+return (value << 16) | value;
+#elif defined COMPILE_PCRE32
+#define SSE2_COMPARE_TYPE_INDEX 2
+return value;
+#else
+#error "Unsupported unit width"
+#endif
+}
+
+static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit[3];
+struct sljit_jump *nomatch;
+sljit_u8 instruction[8];
+sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
+sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
+sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
+BOOL load_twice = FALSE;
+pcre_uchar bit;
+
+bit = char1 ^ char2;
+if (!is_powerof2(bit))
+ bit = 0;
+
+if ((char1 != char2) && bit == 0)
+ load_twice = TRUE;
+
+quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+
+/* First part (unaligned start) */
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
+
+SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
+
+/* MOVD xmm, r/m32 */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+instruction[2] = 0x6e;
+instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (char1 != char2)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
+
+ /* MOVD xmm, r/m32 */
+ instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+/* PSHUFD xmm1, xmm2/m128, imm8 */
+instruction[2] = 0x70;
+instruction[3] = 0xc0 | (2 << 3) | 2;
+instruction[4] = 0;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+if (char1 != char2)
+ {
+ /* PSHUFD xmm1, xmm2/m128, imm8 */
+ instruction[3] = 0xc0 | (3 << 3) | 3;
+ instruction[4] = 0;
+ sljit_emit_op_custom(compiler, instruction, 5);
+ }
+
+OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
+OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+
+/* MOVDQA xmm1, xmm2/m128 */
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+if (str_ptr_ind < 8)
+ {
+ instruction[2] = 0x6f;
+ instruction[3] = (0 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+
+ if (load_twice)
+ {
+ instruction[3] = (1 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+ }
+else
+ {
+ instruction[1] = 0x41;
+ instruction[2] = 0x0f;
+ instruction[3] = 0x6f;
+ instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
+ sljit_emit_op_custom(compiler, instruction, 5);
+
+ if (load_twice)
+ {
+ instruction[4] = (1 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 5);
+ }
+ instruction[1] = 0x0f;
+ }
+
+#else
+
+instruction[2] = 0x6f;
+instruction[3] = (0 << 3) | str_ptr_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (load_twice)
+ {
+ instruction[3] = (1 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+#endif
+
+if (bit != 0)
+ {
+ /* POR xmm1, xmm2/m128 */
+ instruction[2] = 0xeb;
+ instruction[3] = 0xc0 | (0 << 3) | 3;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+/* PCMPEQB/W/D xmm1, xmm2/m128 */
+instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+instruction[3] = 0xc0 | (0 << 3) | 2;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (load_twice)
+ {
+ instruction[3] = 0xc0 | (1 << 3) | 3;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+/* PMOVMSKB reg, xmm */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (load_twice)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
+ instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
+ sljit_emit_op_custom(compiler, instruction, 4);
+
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+ OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
+ }
+
+OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+
+nomatch = JUMP(SLJIT_ZERO);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+quit[1] = JUMP(SLJIT_JUMP);
+
+JUMPHERE(nomatch);
+
+start = LABEL();
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+
+/* Second part (aligned) */
+
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+
+/* MOVDQA xmm1, xmm2/m128 */
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+if (str_ptr_ind < 8)
+ {
+ instruction[2] = 0x6f;
+ instruction[3] = (0 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+
+ if (load_twice)
+ {
+ instruction[3] = (1 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+ }
+else
+ {
+ instruction[1] = 0x41;
+ instruction[2] = 0x0f;
+ instruction[3] = 0x6f;
+ instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
+ sljit_emit_op_custom(compiler, instruction, 5);
+
+ if (load_twice)
+ {
+ instruction[4] = (1 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 5);
+ }
+ instruction[1] = 0x0f;
+ }
+
+#else
+
+instruction[2] = 0x6f;
+instruction[3] = (0 << 3) | str_ptr_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (load_twice)
+ {
+ instruction[3] = (1 << 3) | str_ptr_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+#endif
+
+if (bit != 0)
+ {
+ /* POR xmm1, xmm2/m128 */
+ instruction[2] = 0xeb;
+ instruction[3] = 0xc0 | (0 << 3) | 3;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+/* PCMPEQB/W/D xmm1, xmm2/m128 */
+instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+instruction[3] = 0xc0 | (0 << 3) | 2;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (load_twice)
+ {
+ instruction[3] = 0xc0 | (1 << 3) | 3;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+/* PMOVMSKB reg, xmm */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (load_twice)
+ {
+ instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
+ sljit_emit_op_custom(compiler, instruction, 4);
+
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+ }
+
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+
+JUMPTO(SLJIT_ZERO, start);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+
+start = LABEL();
+SET_LABEL(quit[0], start);
+SET_LABEL(quit[1], start);
+SET_LABEL(quit[2], start);
+}
+
+#undef SSE2_COMPARE_TYPE_INDEX
+
+#endif
+
+static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
-pcre_uint32 chars[MAX_N_CHARS * 2];
-pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
-pcre_uint8 ones[MAX_N_CHARS];
-int offsets[3];
-pcre_uint32 mask;
-pcre_uint8 *byte_set, *byte_set_end;
-int i, max, from;
-int range_right = -1, range_len = 3 - 1;
-sljit_ub *update_table = NULL;
-BOOL in_range;
-pcre_uint32 rec_count;
+struct sljit_jump *found;
+pcre_uchar mask;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+struct sljit_label *utf_start = NULL;
+struct sljit_jump *utf_quit = NULL;
+#endif
+BOOL has_match_end = (common->match_end_ptr != 0);
-for (i = 0; i < MAX_N_CHARS; i++)
+if (offset > 0)
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
+
+if (has_match_end)
{
- chars[i << 1] = NOTACHAR;
- chars[(i << 1) + 1] = 0;
- bytes[i * MAX_N_BYTES] = 0;
+ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+
+ OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+ if (sljit_x86_is_cmov_available())
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
+ sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
+ }
+#endif
+ {
+ quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+ JUMPHERE(quit);
+ }
}
-rec_count = 10000;
-max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+if (common->utf && offset > 0)
+ utf_start = LABEL();
+#endif
-if (max <= 1)
- return FALSE;
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-for (i = 0; i < max; i++)
+/* SSE2 accelerated first character search. */
+
+if (sljit_x86_is_sse2_available())
{
- mask = chars[(i << 1) + 1];
- ones[i] = ones_in_half_byte[mask & 0xf];
- mask >>= 4;
- while (mask != 0)
+ fast_forward_first_char2_sse2(common, char1, char2);
+
+ SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
+ if (common->mode == JIT_COMPILE)
{
- ones[i] += ones_in_half_byte[mask & 0xf];
- mask >>= 4;
+ /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
+ SLJIT_ASSERT(common->forced_quit_label == NULL);
+ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+ add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (common->utf && offset > 0)
+ {
+ SLJIT_ASSERT(common->mode == JIT_COMPILE);
+
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if defined COMPILE_PCRE8
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
+#elif defined COMPILE_PCRE16
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
+#else
+#error "Unknown code width"
+#endif
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ }
+#endif
+
+ if (offset > 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
}
+ else if (sljit_x86_is_cmov_available())
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+ sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
+ }
+ else
+ {
+ quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
+ JUMPHERE(quit);
+ }
+
+ if (has_match_end)
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+ return;
}
+#endif
+
+quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+
+start = LABEL();
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+
+if (char1 == char2)
+ found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
+else
+ {
+ mask = char1 ^ char2;
+ if (is_powerof2(mask))
+ {
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
+ found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
+ }
+ else
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ found = JUMP(SLJIT_NOT_ZERO);
+ }
+ }
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
+
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+if (common->utf && offset > 0)
+ utf_quit = JUMP(SLJIT_JUMP);
+#endif
+
+JUMPHERE(found);
+
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+if (common->utf && offset > 0)
+ {
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if defined COMPILE_PCRE8
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
+#elif defined COMPILE_PCRE16
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
+#else
+#error "Unknown code width"
+#endif
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ JUMPHERE(utf_quit);
+ }
+#endif
+
+JUMPHERE(quit);
+
+if (has_match_end)
+ {
+ quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+ if (offset > 0)
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
+ JUMPHERE(quit);
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+ }
+
+if (offset > 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
+}
+
+static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+struct sljit_jump *match;
+/* bytes[0] represent the number of characters between 0
+and MAX_N_BYTES - 1, 255 represents any character. */
+pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
+sljit_s32 offset;
+pcre_uchar mask;
+pcre_uchar *char_set, *char_set_end;
+int i, max, from;
+int range_right = -1, range_len;
+sljit_u8 *update_table = NULL;
+BOOL in_range;
+sljit_u32 rec_count;
+
+for (i = 0; i < MAX_N_CHARS; i++)
+ chars[i * MAX_DIFF_CHARS] = 0;
+
+rec_count = 10000;
+max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
+
+if (max < 1)
+ return FALSE;
+
in_range = FALSE;
-from = 0; /* Prevent compiler "uninitialized" warning */
+/* Prevent compiler "uninitialized" warning */
+from = 0;
+range_len = 4 /* minimum length */ - 1;
for (i = 0; i <= max; i++)
{
- if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
+ if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
{
range_len = i - from;
range_right = i - 1;
}
- if (i < max && bytes[i * MAX_N_BYTES] < 255)
+ if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
{
+ SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
if (!in_range)
{
in_range = TRUE;
from = i;
}
}
- else if (in_range)
+ else
in_range = FALSE;
}
if (range_right >= 0)
{
- update_table = (sljit_ub *)allocate_read_only_data(common, 256);
+ update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
if (update_table == NULL)
return TRUE;
memset(update_table, IN_UCHARS(range_len), 256);
for (i = 0; i < range_len; i++)
{
- byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
- SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
- byte_set_end = byte_set + byte_set[0];
- byte_set++;
- while (byte_set <= byte_set_end)
+ char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
+ SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
+ char_set_end = char_set + char_set[0];
+ char_set++;
+ while (char_set <= char_set_end)
{
- if (update_table[*byte_set] > IN_UCHARS(i))
- update_table[*byte_set] = IN_UCHARS(i);
- byte_set++;
+ if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
+ update_table[(*char_set) & 0xff] = IN_UCHARS(i);
+ char_set++;
}
}
}
-offsets[0] = -1;
+offset = -1;
/* Scan forward. */
for (i = 0; i < max; i++)
- if (ones[i] <= 2) {
- offsets[0] = i;
- break;
- }
-
-if (offsets[0] < 0 && range_right < 0)
- return FALSE;
-
-if (offsets[0] >= 0)
{
- /* Scan backward. */
- offsets[1] = -1;
- for (i = max - 1; i > offsets[0]; i--)
- if (ones[i] <= 2 && i != range_right)
- {
- offsets[1] = i;
- break;
- }
-
- /* This case is handled better by fast_forward_first_char. */
- if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
- return FALSE;
-
- offsets[2] = -1;
- /* We only search for a middle character if there is no range check. */
- if (offsets[1] >= 0 && range_right == -1)
+ if (offset == -1)
{
- /* Scan from middle. */
- for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
- if (ones[i] <= 2)
+ if (chars[i * MAX_DIFF_CHARS] <= 2)
+ offset = i;
+ }
+ else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
+ {
+ if (chars[i * MAX_DIFF_CHARS] == 1)
+ offset = i;
+ else
+ {
+ mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
+ if (!is_powerof2(mask))
{
- offsets[2] = i;
- break;
+ mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
+ if (is_powerof2(mask))
+ offset = i;
}
-
- if (offsets[2] == -1)
- {
- for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
- if (ones[i] <= 2)
- {
- offsets[2] = i;
- break;
- }
}
}
+ }
- SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
- SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
-
- chars[0] = chars[offsets[0] << 1];
- chars[1] = chars[(offsets[0] << 1) + 1];
- if (offsets[2] >= 0)
- {
- chars[2] = chars[offsets[2] << 1];
- chars[3] = chars[(offsets[2] << 1) + 1];
- }
- if (offsets[1] >= 0)
- {
- chars[4] = chars[offsets[1] << 1];
- chars[5] = chars[(offsets[1] << 1) + 1];
- }
+if (range_right < 0)
+ {
+ if (offset < 0)
+ return FALSE;
+ SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
+ /* Works regardless the value is 1 or 2. */
+ mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
+ fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
+ return TRUE;
}
+if (range_right == offset)
+ offset = -1;
+
+SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
+
max -= 1;
-if (firstline)
+SLJIT_ASSERT(max > 0);
+if (common->match_end_ptr != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
@@ -3736,68 +4432,86 @@ if (firstline)
else
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+SLJIT_ASSERT(range_right >= 0);
+
#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-if (range_right >= 0)
- OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
+OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
#endif
start = LABEL();
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
-
-if (range_right >= 0)
- {
#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
#else
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
#endif
#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
#else
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
#endif
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
- }
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
-if (offsets[0] >= 0)
+if (offset >= 0)
{
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
- if (offsets[1] >= 0)
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (chars[1] != 0)
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
- if (offsets[2] >= 0)
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
-
- if (offsets[1] >= 0)
+ if (chars[offset * MAX_DIFF_CHARS] == 1)
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
+ else
{
- if (chars[5] != 0)
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
- CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
+ mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
+ if (is_powerof2(mask))
+ {
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
+ }
+ else
+ {
+ match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
+ JUMPHERE(match);
+ }
}
+ }
- if (offsets[2] >= 0)
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+if (common->utf && offset != 0)
+ {
+ if (offset < 0)
{
- if (chars[3] != 0)
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ else
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+#if defined COMPILE_PCRE8
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
+#elif defined COMPILE_PCRE16
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
+#else
+#error "Unknown code width"
+#endif
+ if (offset < 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
+#endif
+
+if (offset >= 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPHERE(quit);
-if (firstline)
+if (common->match_end_ptr != 0)
{
if (range_right >= 0)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
if (range_right >= 0)
{
@@ -3812,66 +4526,26 @@ return TRUE;
}
#undef MAX_N_CHARS
-#undef MAX_N_BYTES
+#undef MAX_DIFF_CHARS
-static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
+static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
{
-DEFINE_COMPILER;
-struct sljit_label *start;
-struct sljit_jump *quit;
-struct sljit_jump *found;
-pcre_uchar oc, bit;
-
-if (firstline)
- {
- SLJIT_ASSERT(common->first_line_end != 0);
- OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
- }
-
-start = LABEL();
-quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+pcre_uchar oc;
oc = first_char;
if (caseless)
{
oc = TABLE_GET(first_char, common->fcc, first_char);
-#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+#if defined SUPPORT_UCP && !defined COMPILE_PCRE8
if (first_char > 127 && common->utf)
oc = UCD_OTHERCASE(first_char);
#endif
}
-if (first_char == oc)
- found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
-else
- {
- bit = first_char ^ oc;
- if (is_powerof2(bit))
- {
- OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
- found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
- }
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- found = JUMP(SLJIT_NOT_ZERO);
- }
- }
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-JUMPTO(SLJIT_JUMP, start);
-JUMPHERE(found);
-JUMPHERE(quit);
-
-if (firstline)
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+fast_forward_first_char2(common, first_char, oc, 0);
}
-static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
+static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_label *loop;
@@ -3882,11 +4556,10 @@ struct sljit_jump *foundcr = NULL;
struct sljit_jump *notfoundnl;
jump_list *newline = NULL;
-if (firstline)
+if (common->match_end_ptr != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
}
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
@@ -3917,7 +4590,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
JUMPHERE(firstchar);
JUMPHERE(lastchar);
- if (firstline)
+ if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
return;
}
@@ -3955,13 +4628,13 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
JUMPHERE(lastchar);
JUMPHERE(firstchar);
-if (firstline)
+if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
-static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
+static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
-static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
+static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
{
DEFINE_COMPILER;
struct sljit_label *start;
@@ -3972,11 +4645,10 @@ jump_list *matches = NULL;
struct sljit_jump *jump;
#endif
-if (firstline)
+if (common->match_end_ptr != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
}
start = LABEL();
@@ -3996,7 +4668,7 @@ if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE,
#endif
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
found = JUMP(SLJIT_NOT_ZERO);
@@ -4012,7 +4684,7 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
if (common->utf)
{
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
}
#elif defined COMPILE_PCRE16
@@ -4034,7 +4706,7 @@ if (matches != NULL)
set_jumps(matches, LABEL());
JUMPHERE(quit);
-if (firstline)
+if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
}
@@ -4047,7 +4719,7 @@ struct sljit_jump *alreadyfound;
struct sljit_jump *found;
struct sljit_jump *foundoc = NULL;
struct sljit_jump *notfound;
-pcre_uint32 oc, bit;
+sljit_u32 oc, bit;
SLJIT_ASSERT(common->req_char_ptr != 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
@@ -4184,7 +4856,7 @@ else
if (common->utf)
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
#endif /* COMPILE_PCRE8 */
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
@@ -4229,7 +4901,7 @@ else
if (common->utf)
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
#endif
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
#ifndef COMPILE_PCRE8
@@ -4245,11 +4917,12 @@ OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOC
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
}
-static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
+static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
{
+/* May destroy TMP1. */
DEFINE_COMPILER;
int ranges[MAX_RANGE_SIZE];
-pcre_uint8 bit, cbit, all;
+sljit_u8 bit, cbit, all;
int i, byte, length = 0;
bit = bits[0] & 0x1;
@@ -4544,12 +5217,12 @@ OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
#ifndef COMPILE_PCRE8
jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
#endif
-OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
+OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
#ifndef COMPILE_PCRE8
JUMPHERE(jump);
jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
#endif
-OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
+OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
#ifndef COMPILE_PCRE8
JUMPHERE(jump);
#endif
@@ -4574,11 +5247,11 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
{
/* This function would be ineffective to do in JIT level. */
-pcre_uint32 c1, c2;
+sljit_u32 c1, c2;
const pcre_uchar *src2 = args->uchar_ptr;
const pcre_uchar *end2 = args->end;
const ucd_record *ur;
-const pcre_uint32 *pp;
+const sljit_u32 *pp;
while (src1 < end1)
{
@@ -4638,16 +5311,16 @@ if (context->sourcereg == -1)
#if defined COMPILE_PCRE8
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
if (context->length >= 4)
- OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else if (context->length >= 2)
- OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else
#endif
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
#elif defined COMPILE_PCRE16
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
if (context->length >= 4)
- OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else
#endif
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
@@ -4689,12 +5362,12 @@ do
#endif
{
if (context->length >= 4)
- OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
else if (context->length >= 2)
- OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
#if defined COMPILE_PCRE8
else if (context->length >= 1)
- OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
#endif /* COMPILE_PCRE8 */
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
@@ -4777,6 +5450,8 @@ return cc;
} \
charoffset = (value);
+static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
+
static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
{
DEFINE_COMPILER;
@@ -4793,8 +5468,8 @@ BOOL utf = common->utf;
#ifdef SUPPORT_UCP
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
BOOL charsaved = FALSE;
-int typereg = TMP1, scriptreg = TMP1;
-const pcre_uint32 *other_cases;
+int typereg = TMP1;
+const sljit_u32 *other_cases;
sljit_uw typeoffset;
#endif
@@ -4856,6 +5531,14 @@ while (*cc != XCL_END)
switch(*cc)
{
case PT_ANY:
+ /* Any either accepts everything or ignored. */
+ if (cc[-1] == XCL_PROP)
+ {
+ compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
+ if (list == backtracks)
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ return;
+ }
break;
case PT_LAMP:
@@ -4892,10 +5575,10 @@ while (*cc != XCL_END)
}
#endif
}
+SLJIT_ASSERT(compares > 0);
/* We are not necessary in utf mode even in 8 bit mode. */
cc = ccbegin;
-detect_partial_match(common, backtracks);
read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
if ((cc[-1] & XCL_HASPROP) == 0)
@@ -4903,11 +5586,11 @@ if ((cc[-1] & XCL_HASPROP) == 0)
if ((cc[-1] & XCL_MAP) != 0)
{
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
+ if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
{
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
@@ -4926,11 +5609,11 @@ if ((cc[-1] & XCL_HASPROP) == 0)
}
else if ((cc[-1] & XCL_MAP) != 0)
{
- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
#ifdef SUPPORT_UCP
charsaved = TRUE;
#endif
- if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
+ if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
{
#ifdef COMPILE_PCRE8
jump = NULL;
@@ -4940,7 +5623,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
@@ -4951,45 +5634,82 @@ else if ((cc[-1] & XCL_MAP) != 0)
JUMPHERE(jump);
}
- OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
cc += 32 / sizeof(pcre_uchar);
}
#ifdef SUPPORT_UCP
-/* Simple register allocation. TMP1 is preferred if possible. */
if (needstype || needsscript)
{
if (needschar && !charsaved)
- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
- add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
- if (needschar)
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+
+ OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
+ OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
+
+ /* Before anything else, we deal with scripts. */
+ if (needsscript)
{
- if (needstype)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+
+ ccbegin = cc;
+
+ while (*cc != XCL_END)
{
- OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
- typereg = RETURN_ADDR;
+ if (*cc == XCL_SINGLE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ }
+ else if (*cc == XCL_RANGE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ GETCHARINCTEST(c, cc);
+ }
+ else
+ {
+ SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
+ cc++;
+ if (*cc == PT_SC)
+ {
+ compares--;
+ invertcmp = (compares == 0 && list != backtracks);
+ if (cc[-1] == XCL_NOTPROP)
+ invertcmp ^= 0x1;
+ jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
+ add_jump(compiler, compares > 0 ? list : backtracks, jump);
+ }
+ cc += 2;
+ }
}
- if (needsscript)
- scriptreg = TMP3;
- OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+ cc = ccbegin;
}
- else if (needstype && needsscript)
- scriptreg = TMP3;
- /* In all other cases only one of them was specified, and that can goes to TMP1. */
- if (needsscript)
+ if (needschar)
{
- if (scriptreg == TMP1)
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+ }
+
+ if (needstype)
+ {
+ if (!needschar)
{
- OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
- OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
}
else
{
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
- OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
+ OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ typereg = RETURN_ADDR;
}
}
}
@@ -5061,20 +5781,15 @@ while (*cc != XCL_END)
#ifdef SUPPORT_UCP
else
{
+ SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
if (*cc == XCL_NOTPROP)
invertcmp ^= 0x1;
cc++;
switch(*cc)
{
case PT_ANY:
- if (list != backtracks)
- {
- if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
- continue;
- }
- else if (cc[-1] == XCL_NOTPROP)
- continue;
- jump = JUMP(SLJIT_JUMP);
+ if (!invertcmp)
+ jump = JUMP(SLJIT_JUMP);
break;
case PT_LAMP:
@@ -5098,7 +5813,8 @@ while (*cc != XCL_END)
break;
case PT_SC:
- jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
+ compares++;
+ /* Do nothing. */
break;
case PT_SPACE:
@@ -5264,6 +5980,10 @@ while (*cc != XCL_END)
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
+
+ default:
+ SLJIT_ASSERT_STOP();
+ break;
}
cc += 2;
}
@@ -5282,19 +6002,13 @@ if (found != NULL)
#endif
-static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
+static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
{
DEFINE_COMPILER;
int length;
-unsigned int c, oc, bit;
-compare_context context;
struct sljit_jump *jump[4];
-jump_list *end_list;
#ifdef SUPPORT_UTF
struct sljit_label *label;
-#ifdef SUPPORT_UCP
-pcre_uchar propdata[5];
-#endif
#endif /* SUPPORT_UTF */
switch(type)
@@ -5317,12 +6031,220 @@ switch(type)
add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc;
+ case OP_EODN:
+ /* Requires rather complex checks. */
+ jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ if (common->mode == JIT_COMPILE)
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
+ else
+ {
+ jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
+ check_partial(common, TRUE);
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ JUMPHERE(jump[1]);
+ }
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+ }
+ else if (common->nltype == NLTYPE_FIXED)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
+ }
+ else
+ {
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ jump[2] = JUMP(SLJIT_GREATER);
+ add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
+ /* Equal. */
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+
+ JUMPHERE(jump[1]);
+ if (common->nltype == NLTYPE_ANYCRLF)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+ }
+ else
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+ add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+ add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+ }
+ JUMPHERE(jump[2]);
+ JUMPHERE(jump[3]);
+ }
+ JUMPHERE(jump[0]);
+ check_partial(common, FALSE);
+ return cc;
+
+ case OP_EOD:
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ check_partial(common, FALSE);
+ return cc;
+
+ case OP_DOLL:
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+
+ if (!common->endonly)
+ compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
+ else
+ {
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ check_partial(common, FALSE);
+ }
+ return cc;
+
+ case OP_DOLLM:
+ jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+ check_partial(common, FALSE);
+ jump[0] = JUMP(SLJIT_JUMP);
+ JUMPHERE(jump[1]);
+
+ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+ {
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ if (common->mode == JIT_COMPILE)
+ add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
+ else
+ {
+ jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
+ /* STR_PTR = STR_END - IN_UCHARS(1) */
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ check_partial(common, TRUE);
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ JUMPHERE(jump[1]);
+ }
+
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+ }
+ else
+ {
+ peek_char(common, common->nlmax);
+ check_newlinechar(common, common->nltype, backtracks, FALSE);
+ }
+ JUMPHERE(jump[0]);
+ return cc;
+
+ case OP_CIRC:
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+ add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+ return cc;
+
+ case OP_CIRCM:
+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+ jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+ jump[0] = JUMP(SLJIT_JUMP);
+ JUMPHERE(jump[1]);
+
+ add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+ {
+ OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+ add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+ }
+ else
+ {
+ skip_char_back(common);
+ read_char_range(common, common->nlmin, common->nlmax, TRUE);
+ check_newlinechar(common, common->nltype, backtracks, FALSE);
+ }
+ JUMPHERE(jump[0]);
+ return cc;
+
+ case OP_REVERSE:
+ length = GET(cc, 0);
+ if (length == 0)
+ return cc + LINK_SIZE;
+ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+#ifdef SUPPORT_UTF
+ if (common->utf)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
+ label = LABEL();
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
+ skip_char_back(common);
+ OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+ else
+#endif
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
+ add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
+ }
+ check_start_used_ptr(common);
+ return cc + LINK_SIZE;
+ }
+SLJIT_ASSERT_STOP();
+return cc;
+}
+
+static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
+{
+DEFINE_COMPILER;
+int length;
+unsigned int c, oc, bit;
+compare_context context;
+struct sljit_jump *jump[3];
+jump_list *end_list;
+#ifdef SUPPORT_UTF
+struct sljit_label *label;
+#ifdef SUPPORT_UCP
+pcre_uchar propdata[5];
+#endif
+#endif /* SUPPORT_UTF */
+
+switch(type)
+ {
case OP_NOT_DIGIT:
case OP_DIGIT:
/* Digits are usually 0-9, so it is worth to optimize them. */
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
+ if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
read_char7_type(common, type == OP_NOT_DIGIT);
else
#endif
@@ -5334,9 +6256,10 @@ switch(type)
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
+ if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
read_char7_type(common, type == OP_NOT_WHITESPACE);
else
#endif
@@ -5347,9 +6270,10 @@ switch(type)
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
+ if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
read_char7_type(common, type == OP_NOT_WORDCHAR);
else
#endif
@@ -5359,7 +6283,8 @@ switch(type)
return cc;
case OP_ANY:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, common->nlmin, common->nlmax, TRUE);
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
{
@@ -5380,7 +6305,8 @@ switch(type)
return cc;
case OP_ALLANY:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#ifdef SUPPORT_UTF
if (common->utf)
{
@@ -5389,7 +6315,7 @@ switch(type)
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
#if defined COMPILE_PCRE8
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
#elif defined COMPILE_PCRE16
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
@@ -5408,7 +6334,8 @@ switch(type)
return cc;
case OP_ANYBYTE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
return cc;
@@ -5421,13 +6348,16 @@ switch(type)
propdata[2] = cc[0];
propdata[3] = cc[1];
propdata[4] = XCL_END;
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
compile_xclass_matchingpath(common, propdata, backtracks);
return cc + 2;
#endif
#endif
case OP_ANYNL:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
/* We don't need to handle soft partial matching case. */
@@ -5449,7 +6379,8 @@ switch(type)
case OP_NOT_HSPACE:
case OP_HSPACE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
@@ -5457,7 +6388,8 @@ switch(type)
case OP_NOT_VSPACE:
case OP_VSPACE:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
@@ -5465,13 +6397,14 @@ switch(type)
#ifdef SUPPORT_UCP
case OP_EXTUNI:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
read_char(common);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
/* Optimize register allocation: use a real register. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
- OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+ OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
label = LABEL();
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
@@ -5479,10 +6412,10 @@ switch(type)
read_char(common);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
- OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
+ OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
@@ -5502,173 +6435,14 @@ switch(type)
return cc;
#endif
- case OP_EODN:
- /* Requires rather complex checks. */
- jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (common->mode == JIT_COMPILE)
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
- else
- {
- jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
- check_partial(common, TRUE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[1]);
- }
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else if (common->nltype == NLTYPE_FIXED)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
- }
- else
- {
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- jump[2] = JUMP(SLJIT_GREATER);
- add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
- /* Equal. */
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
-
- JUMPHERE(jump[1]);
- if (common->nltype == NLTYPE_ANYCRLF)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
- }
- else
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
- add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
- add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
- }
- JUMPHERE(jump[2]);
- JUMPHERE(jump[3]);
- }
- JUMPHERE(jump[0]);
- check_partial(common, FALSE);
- return cc;
-
- case OP_EOD:
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
- check_partial(common, FALSE);
- return cc;
-
- case OP_CIRC:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- return cc;
-
- case OP_CIRCM:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
- jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- jump[0] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[1]);
-
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else
- {
- skip_char_back(common);
- read_char_range(common, common->nlmin, common->nlmax, TRUE);
- check_newlinechar(common, common->nltype, backtracks, FALSE);
- }
- JUMPHERE(jump[0]);
- return cc;
-
- case OP_DOLL:
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
-
- if (!common->endonly)
- compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
- else
- {
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
- check_partial(common, FALSE);
- }
- return cc;
-
- case OP_DOLLM:
- jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
- check_partial(common, FALSE);
- jump[0] = JUMP(SLJIT_JUMP);
- JUMPHERE(jump[1]);
-
- if (common->nltype == NLTYPE_FIXED && common->newline > 255)
- {
- OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
- if (common->mode == JIT_COMPILE)
- add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
- else
- {
- jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
- /* STR_PTR = STR_END - IN_UCHARS(1) */
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- check_partial(common, TRUE);
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- JUMPHERE(jump[1]);
- }
-
- OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
- add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
- }
- else
- {
- peek_char(common, common->nlmax);
- check_newlinechar(common, common->nltype, backtracks, FALSE);
- }
- JUMPHERE(jump[0]);
- return cc;
-
case OP_CHAR:
case OP_CHARI:
length = 1;
#ifdef SUPPORT_UTF
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
#endif
- if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
+ if (common->mode == JIT_COMPILE && check_str_ptr
+ && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
{
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
@@ -5681,7 +6455,8 @@ switch(type)
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
}
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#ifdef SUPPORT_UTF
if (common->utf)
{
@@ -5713,7 +6488,8 @@ switch(type)
case OP_NOT:
case OP_NOTI:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
length = 1;
#ifdef SUPPORT_UTF
if (common->utf)
@@ -5722,7 +6498,7 @@ switch(type)
c = *cc;
if (c < 128)
{
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
if (type == OP_NOT || !char_has_othercase(common, cc))
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
else
@@ -5774,16 +6550,17 @@ switch(type)
case OP_CLASS:
case OP_NCLASS:
- detect_partial_match(common, backtracks);
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
+ bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
read_char_range(common, 0, bit, type == OP_NCLASS);
#else
read_char_range(common, 0, 255, type == OP_NCLASS);
#endif
- if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
+ if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
return cc + 32 / sizeof(pcre_uchar);
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
@@ -5808,7 +6585,7 @@ switch(type)
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
@@ -5817,40 +6594,15 @@ switch(type)
if (jump[0] != NULL)
JUMPHERE(jump[0]);
#endif
-
return cc + 32 / sizeof(pcre_uchar);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
+ if (check_str_ptr)
+ detect_partial_match(common, backtracks);
compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
return cc + GET(cc, 0) - 1;
#endif
-
- case OP_REVERSE:
- length = GET(cc, 0);
- if (length == 0)
- return cc + LINK_SIZE;
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
-#ifdef SUPPORT_UTF
- if (common->utf)
- {
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
- label = LABEL();
- add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
- skip_char_back(common);
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
-#endif
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
- }
- check_start_used_ptr(common);
- return cc + LINK_SIZE;
}
SLJIT_ASSERT_STOP();
return cc;
@@ -5919,7 +6671,7 @@ if (context.length > 0)
}
/* A non-fixed length character will be checked if length == 0. */
-return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
+return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
}
/* Forward definitions. */
@@ -6094,7 +6846,7 @@ pcre_uchar *ccbegin = cc;
int min = 0, max = 0;
BOOL minimize;
-PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
if (ref)
offset = GET2(cc, 1) << 1;
@@ -6214,7 +6966,7 @@ if (!minimize)
}
JUMPHERE(zerolength);
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
count_match(common);
return cc;
@@ -6260,7 +7012,7 @@ else
}
}
-BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
if (max > 0)
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
@@ -6274,7 +7026,7 @@ if (min > 1)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
}
else if (max > 0)
OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
@@ -6419,8 +7171,8 @@ allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
SLJIT_ASSERT(common->capture_last_ptr != 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
/* These pointer sized fields temporarly stores internal variables. */
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
@@ -6429,8 +7181,8 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
if (common->mark_ptr != 0)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
+OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
/* Needed to save important temporary registers. */
@@ -6438,7 +7190,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
-OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
+OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
@@ -6455,6 +7207,32 @@ return cc + 2 + 2 * LINK_SIZE;
#undef CALLOUT_ARG_SIZE
#undef CALLOUT_ARG_OFFSET
+static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
+{
+while (TRUE)
+ {
+ switch (*cc)
+ {
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ case OP_CALLOUT:
+ case OP_ALT:
+ cc += PRIV(OP_lengths)[*cc];
+ break;
+
+ case OP_KET:
+ return FALSE;
+
+ default:
+ return TRUE;
+ }
+ }
+}
+
static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
{
DEFINE_COMPILER;
@@ -6511,15 +7289,28 @@ if (bra == OP_BRAMINZERO)
if (framesize < 0)
{
- extrasize = needs_control_head ? 2 : 1;
+ extrasize = 1;
+ if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
+ extrasize = 0;
+
+ if (needs_control_head)
+ extrasize++;
+
if (framesize == no_frame)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
- allocate_stack(common, extrasize);
+
+ if (extrasize > 0)
+ allocate_stack(common, extrasize);
+
if (needs_control_head)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+
+ if (extrasize > 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+
if (needs_control_head)
{
+ SLJIT_ASSERT(extrasize == 2);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
}
@@ -6528,12 +7319,14 @@ else
{
extrasize = needs_control_head ? 3 : 2;
allocate_stack(common, framesize + extrasize);
+
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
if (needs_control_head)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+
if (needs_control_head)
{
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
@@ -6542,6 +7335,7 @@ else
}
else
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+
init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
}
@@ -6565,7 +7359,7 @@ while (1)
altbacktrack.top = NULL;
altbacktrack.topbacktracks = NULL;
- if (*ccbegin == OP_ALT)
+ if (*ccbegin == OP_ALT && extrasize > 0)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
altbacktrack.cc = ccbegin;
@@ -6594,8 +7388,9 @@ while (1)
{
if (framesize == no_frame)
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- else
+ else if (extrasize > 0)
free_stack(common, extrasize);
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
}
@@ -6621,7 +7416,10 @@ while (1)
{
/* We know that STR_PTR was stored on the top of the stack. */
if (conditional)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
+ {
+ if (extrasize > 0)
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
+ }
else if (bra == OP_BRAZERO)
{
if (framesize < 0)
@@ -6698,7 +7496,7 @@ if (needs_control_head)
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
/* Assert is failed. */
- if (conditional || bra == OP_BRAZERO)
+ if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
if (framesize < 0)
@@ -6710,7 +7508,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
free_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
- else
+ else if (extrasize > 0)
free_stack(common, extrasize);
}
else
@@ -6735,7 +7533,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
if (framesize < 0)
{
/* We know that STR_PTR was stored on the top of the stack. */
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+ if (extrasize > 0)
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+
/* Keep the STR_PTR on the top of the stack. */
if (bra == OP_BRAZERO)
{
@@ -6798,14 +7598,16 @@ else
/* AssertNot is successful. */
if (framesize < 0)
{
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ if (extrasize > 0)
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
if (bra != OP_BRA)
{
if (extrasize == 2)
free_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
- else
+ else if (extrasize > 0)
free_stack(common, extrasize);
}
else
@@ -6867,7 +7669,9 @@ if (framesize < 0)
stacksize = needs_control_head ? 1 : 0;
if (ket != OP_KET || has_alternatives)
stacksize++;
- free_stack(common, stacksize);
+
+ if (stacksize > 0)
+ free_stack(common, stacksize);
}
if (needs_control_head)
@@ -7513,9 +8317,13 @@ while (*cc == OP_ALT)
cc += GET(cc, 1);
cc += 1 + LINK_SIZE;
-/* Temporarily encoding the needs_control_head in framesize. */
if (opcode == OP_ONCE)
+ {
+ /* We temporarily encode the needs_control_head in the lowest bit.
+ Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
+ the same value for small signed numbers (including negative numbers). */
BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
+ }
return cc + repeat_length;
}
@@ -7802,11 +8610,13 @@ count_match(common);
return cc + 1 + LINK_SIZE;
}
-static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
+static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
{
int class_len;
*opcode = *cc;
+*exact = 0;
+
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
{
cc++;
@@ -7834,7 +8644,7 @@ else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
{
cc++;
*opcode -= OP_TYPESTAR - OP_STAR;
- *type = 0;
+ *type = OP_END;
}
else
{
@@ -7843,60 +8653,105 @@ else
cc++;
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
*opcode = cc[class_len - 1];
+
if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
{
*opcode -= OP_CRSTAR - OP_STAR;
- if (end != NULL)
- *end = cc + class_len;
+ *end = cc + class_len;
+
+ if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
+ {
+ *exact = 1;
+ *opcode -= OP_PLUS - OP_STAR;
+ }
}
else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
{
*opcode -= OP_CRPOSSTAR - OP_POSSTAR;
- if (end != NULL)
- *end = cc + class_len;
+ *end = cc + class_len;
+
+ if (*opcode == OP_POSPLUS)
+ {
+ *exact = 1;
+ *opcode = OP_POSSTAR;
+ }
}
else
{
SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
*max = GET2(cc, (class_len + IMM2_SIZE));
- *min = GET2(cc, class_len);
+ *exact = GET2(cc, class_len);
- if (*min == 0)
+ if (*max == 0)
{
- SLJIT_ASSERT(*max != 0);
- *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
+ if (*opcode == OP_CRPOSRANGE)
+ *opcode = OP_POSSTAR;
+ else
+ *opcode -= OP_CRRANGE - OP_STAR;
}
- if (*max == *min)
- *opcode = OP_EXACT;
-
- if (end != NULL)
- *end = cc + class_len + 2 * IMM2_SIZE;
+ else
+ {
+ *max -= *exact;
+ if (*max == 0)
+ *opcode = OP_EXACT;
+ else if (*max == 1)
+ {
+ if (*opcode == OP_CRPOSRANGE)
+ *opcode = OP_POSQUERY;
+ else
+ *opcode -= OP_CRRANGE - OP_QUERY;
+ }
+ else
+ {
+ if (*opcode == OP_CRPOSRANGE)
+ *opcode = OP_POSUPTO;
+ else
+ *opcode -= OP_CRRANGE - OP_UPTO;
+ }
+ }
+ *end = cc + class_len + 2 * IMM2_SIZE;
}
return cc;
}
-if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
+switch(*opcode)
{
+ case OP_EXACT:
+ *exact = GET2(cc, 0);
+ cc += IMM2_SIZE;
+ break;
+
+ case OP_PLUS:
+ case OP_MINPLUS:
+ *exact = 1;
+ *opcode -= OP_PLUS - OP_STAR;
+ break;
+
+ case OP_POSPLUS:
+ *exact = 1;
+ *opcode = OP_POSSTAR;
+ break;
+
+ case OP_UPTO:
+ case OP_MINUPTO:
+ case OP_POSUPTO:
*max = GET2(cc, 0);
cc += IMM2_SIZE;
+ break;
}
-if (*type == 0)
+if (*type == OP_END)
{
*type = *cc;
- if (end != NULL)
- *end = next_opcode(common, cc);
+ *end = next_opcode(common, cc);
cc++;
return cc;
}
-if (end != NULL)
- {
- *end = cc + 1;
+*end = cc + 1;
#ifdef SUPPORT_UTF
- if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
+if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
#endif
- }
return cc;
}
@@ -7906,9 +8761,15 @@ DEFINE_COMPILER;
backtrack_common *backtrack;
pcre_uchar opcode;
pcre_uchar type;
-int max = -1, min = -1;
+sljit_u32 max = 0, exact;
+BOOL fast_fail;
+sljit_s32 fast_str_ptr;
+BOOL charpos_enabled;
+pcre_uchar charpos_char;
+unsigned int charpos_othercasebit;
pcre_uchar *end;
-jump_list *nomatch = NULL;
+jump_list *no_match = NULL;
+jump_list *no_char1_match = NULL;
struct sljit_jump *jump = NULL;
struct sljit_label *label;
int private_data_ptr = PRIVATE_DATA(cc);
@@ -7917,83 +8778,92 @@ int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
int tmp_base, tmp_offset;
-PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
-cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
+fast_str_ptr = PRIVATE_DATA(cc + 1);
+fast_fail = TRUE;
-switch(type)
+SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
+
+if (cc == common->fast_forward_bc_ptr)
+ fast_fail = FALSE;
+else if (common->fast_fail_start_ptr == 0)
+ fast_str_ptr = 0;
+
+SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
+ || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
+
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
+
+if (type != OP_EXTUNI)
{
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
- case OP_ANYNL:
- case OP_NOT_HSPACE:
- case OP_HSPACE:
- case OP_NOT_VSPACE:
- case OP_VSPACE:
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_CLASS:
- case OP_NCLASS:
tmp_base = TMP3;
tmp_offset = 0;
- break;
-
- default:
- SLJIT_ASSERT_STOP();
- /* Fall through. */
-
- case OP_EXTUNI:
- case OP_XCLASS:
- case OP_NOTPROP:
- case OP_PROP:
+ }
+else
+ {
tmp_base = SLJIT_MEM1(SLJIT_SP);
tmp_offset = POSSESSIVE0;
- break;
}
+if (fast_fail && fast_str_ptr != 0)
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
+
+/* Handle fixed part first. */
+if (exact > 1)
+ {
+ SLJIT_ASSERT(fast_str_ptr == 0);
+ if (common->mode == JIT_COMPILE
+#ifdef SUPPORT_UTF
+ && !common->utf
+#endif
+ )
+ {
+ OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+ else
+ {
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+ }
+else if (exact == 1)
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
+
switch(opcode)
{
case OP_STAR:
- case OP_PLUS:
case OP_UPTO:
- case OP_CRRANGE:
+ SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
+
if (type == OP_ANYNL || type == OP_EXTUNI)
{
SLJIT_ASSERT(private_data_ptr == 0);
- if (opcode == OP_STAR || opcode == OP_UPTO)
- {
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
- }
- else
- {
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
- }
+ SLJIT_ASSERT(fast_str_ptr == 0);
+
+ allocate_stack(common, 2);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
- if (opcode == OP_UPTO || opcode == OP_CRRANGE)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
+ if (opcode == OP_UPTO)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
- if (opcode == OP_UPTO || opcode == OP_CRRANGE)
+ compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
+ if (opcode == OP_UPTO)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- if (opcode == OP_CRRANGE && min > 0)
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
- if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
- jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
+ OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ jump = JUMP(SLJIT_ZERO);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
}
@@ -8006,134 +8876,268 @@ switch(opcode)
}
else
{
- if (opcode == OP_PLUS)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
- if (private_data_ptr == 0)
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (opcode <= OP_PLUS)
+ charpos_enabled = FALSE;
+ charpos_char = 0;
+ charpos_othercasebit = 0;
+
+ if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
+ {
+ charpos_enabled = TRUE;
+#ifdef SUPPORT_UTF
+ charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
+#endif
+ if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
+ {
+ charpos_othercasebit = char_get_othercase_bit(common, end + 1);
+ if (charpos_othercasebit == 0)
+ charpos_enabled = FALSE;
+ }
+
+ if (charpos_enabled)
+ {
+ charpos_char = end[1];
+ /* Consumpe the OP_CHAR opcode. */
+ end += 2;
+#if defined COMPILE_PCRE8
+ SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+ SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
+ if ((charpos_othercasebit & 0x100) != 0)
+ charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
+#endif
+ if (charpos_othercasebit != 0)
+ charpos_char |= charpos_othercasebit;
+
+ BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
+ BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
+ BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
+ }
+ }
+
+ if (charpos_enabled)
+ {
+ if (opcode == OP_UPTO)
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
+
+ /* Search the first instance of charpos_char. */
+ jump = JUMP(SLJIT_JUMP);
+ label = LABEL();
+ if (opcode == OP_UPTO)
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
+ }
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
+ if (fast_str_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ JUMPHERE(jump);
+
+ detect_partial_match(common, &backtrack->topbacktracks);
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ if (charpos_othercasebit != 0)
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
+
+ if (private_data_ptr == 0)
+ allocate_stack(common, 2);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
- else
- OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &nomatch);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (opcode <= OP_PLUS)
- JUMPTO(SLJIT_JUMP, label);
- else if (opcode == OP_CRRANGE && max == 0)
+ if (opcode == OP_UPTO)
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
+ }
+
+ /* Search the last instance of charpos_char. */
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
+ if (fast_str_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ detect_partial_match(common, &no_match);
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+ if (charpos_othercasebit != 0)
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
+ if (opcode == OP_STAR)
+ {
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ }
+ else
+ {
+ jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ JUMPHERE(jump);
+ }
+
+ if (opcode == OP_UPTO)
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+ else
+ JUMPTO(SLJIT_JUMP, label);
+
+ set_jumps(no_match, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ }
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ else if (common->utf)
{
- OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
- JUMPTO(SLJIT_JUMP, label);
+ if (private_data_ptr == 0)
+ allocate_stack(common, 2);
+
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
+
+ if (opcode == OP_UPTO)
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
+
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+
+ if (opcode == OP_UPTO)
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+ else
+ JUMPTO(SLJIT_JUMP, label);
+
+ set_jumps(no_match, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+ if (fast_str_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
}
+#endif
else
{
- OP1(SLJIT_MOV, TMP1, 0, base, offset1);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, base, offset1, TMP1, 0);
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
+ if (private_data_ptr == 0)
+ allocate_stack(common, 2);
+
+ OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
+ if (opcode == OP_UPTO)
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
+
+ label = LABEL();
+ detect_partial_match(common, &no_match);
+ compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
+ if (opcode == OP_UPTO)
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ }
+ else
+ JUMPTO(SLJIT_JUMP, label);
+
+ set_jumps(no_char1_match, LABEL());
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ set_jumps(no_match, LABEL());
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ if (fast_str_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
}
- set_jumps(nomatch, LABEL());
- if (opcode == OP_CRRANGE)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
}
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
case OP_MINSTAR:
- case OP_MINPLUS:
- if (opcode == OP_MINPLUS)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
if (private_data_ptr == 0)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
+ if (fast_str_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
break;
case OP_MINUPTO:
- case OP_CRMINRANGE:
+ SLJIT_ASSERT(fast_str_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 2);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
- if (opcode == OP_CRMINRANGE)
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
case OP_QUERY:
case OP_MINQUERY:
+ SLJIT_ASSERT(fast_str_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode == OP_QUERY)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
case OP_EXACT:
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
break;
case OP_POSSTAR:
- case OP_POSPLUS:
- case OP_POSUPTO:
- if (opcode == OP_POSPLUS)
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
- if (opcode == OP_POSUPTO)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &nomatch);
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- if (opcode != OP_POSUPTO)
- JUMPTO(SLJIT_JUMP, label);
- else
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (common->utf)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+ JUMPTO(SLJIT_JUMP, label);
+ set_jumps(no_match, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+ if (fast_str_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ break;
}
- set_jumps(nomatch, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
- break;
-
- case OP_POSQUERY:
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- compile_char1_matchingpath(common, type, cc, &nomatch);
- OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- set_jumps(nomatch, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+#endif
+ label = LABEL();
+ detect_partial_match(common, &no_match);
+ compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
+ JUMPTO(SLJIT_JUMP, label);
+ set_jumps(no_char1_match, LABEL());
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ set_jumps(no_match, LABEL());
+ if (fast_str_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
break;
- case OP_CRPOSRANGE:
- /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
+ case OP_POSUPTO:
+ SLJIT_ASSERT(fast_str_ptr == 0);
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (common->utf)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ set_jumps(no_match, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
+ break;
+ }
+#endif
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ detect_partial_match(common, &no_match);
+ compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ set_jumps(no_char1_match, LABEL());
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ set_jumps(no_match, LABEL());
+ break;
- if (max != 0)
- {
- SLJIT_ASSERT(max - min > 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
- }
+ case OP_POSQUERY:
+ SLJIT_ASSERT(fast_str_ptr == 0);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &nomatch);
+ compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
- if (max == 0)
- JUMPTO(SLJIT_JUMP, label);
- else
- {
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- set_jumps(nomatch, LABEL());
+ set_jumps(no_match, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
break;
@@ -8174,9 +9178,9 @@ if (common->accept_label == NULL)
else
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
-OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
+OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
-OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
+OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
if (common->accept_label == NULL)
add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
else
@@ -8298,6 +9302,16 @@ while (cc < ccend)
case OP_SOM:
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_DOLL:
+ case OP_DOLLM:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_REVERSE:
+ cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ break;
+
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
@@ -8315,16 +9329,9 @@ while (cc < ccend)
case OP_NOT_VSPACE:
case OP_VSPACE:
case OP_EXTUNI:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
case OP_NOT:
case OP_NOTI:
- case OP_REVERSE:
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
case OP_SET_SOM:
@@ -8341,7 +9348,7 @@ while (cc < ccend)
if (common->mode == JIT_COMPILE)
cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
case OP_STAR:
@@ -8417,7 +9424,7 @@ while (cc < ccend)
if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
@@ -8425,7 +9432,7 @@ while (cc < ccend)
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
break;
#endif
@@ -8601,95 +9608,82 @@ DEFINE_COMPILER;
pcre_uchar *cc = current->cc;
pcre_uchar opcode;
pcre_uchar type;
-int max = -1, min = -1;
+sljit_u32 max = 0, exact;
struct sljit_label *label = NULL;
struct sljit_jump *jump = NULL;
jump_list *jumplist = NULL;
+pcre_uchar *end;
int private_data_ptr = PRIVATE_DATA(cc);
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
-cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
switch(opcode)
{
case OP_STAR:
- case OP_PLUS:
case OP_UPTO:
- case OP_CRRANGE:
if (type == OP_ANYNL || type == OP_EXTUNI)
{
SLJIT_ASSERT(private_data_ptr == 0);
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
}
else
{
- if (opcode == OP_UPTO)
- min = 0;
- if (opcode <= OP_PLUS)
+ if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
{
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
+ OP1(SLJIT_MOV, TMP2, 0, base, offset1);
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+ jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
+ label = LABEL();
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
+ skip_char_back(common);
+ CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
}
else
{
- OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
- OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
+ jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
+ skip_char_back(common);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
}
- skip_char_back(common);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
- if (opcode == OP_CRRANGE)
- set_jumps(current->topbacktracks, LABEL());
JUMPHERE(jump);
if (private_data_ptr == 0)
free_stack(common, 2);
- if (opcode == OP_PLUS)
- set_jumps(current->topbacktracks, LABEL());
}
break;
case OP_MINSTAR:
- case OP_MINPLUS:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- compile_char1_matchingpath(common, type, cc, &jumplist);
+ compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
if (private_data_ptr == 0)
free_stack(common, 1);
- if (opcode == OP_MINPLUS)
- set_jumps(current->topbacktracks, LABEL());
break;
case OP_MINUPTO:
- case OP_CRMINRANGE:
- if (opcode == OP_CRMINRANGE)
- {
- label = LABEL();
- set_jumps(current->topbacktracks, label);
- }
+ OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- compile_char1_matchingpath(common, type, cc, &jumplist);
+ OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
- OP1(SLJIT_MOV, TMP1, 0, base, offset1);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
-
- if (opcode == OP_CRMINRANGE)
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
-
- if (opcode == OP_CRMINRANGE && max == 0)
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
- else
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
+ compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
if (private_data_ptr == 0)
@@ -8699,12 +9693,12 @@ switch(opcode)
case OP_QUERY:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
jump = JUMP(SLJIT_JUMP);
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
JUMPHERE(jump);
if (private_data_ptr == 0)
free_stack(common, 1);
@@ -8714,8 +9708,8 @@ switch(opcode)
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
- compile_char1_matchingpath(common, type, cc, &jumplist);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
JUMPHERE(jump);
if (private_data_ptr == 0)
@@ -8723,11 +9717,6 @@ switch(opcode)
break;
case OP_EXACT:
- case OP_POSPLUS:
- case OP_CRPOSRANGE:
- set_jumps(current->topbacktracks, LABEL());
- break;
-
case OP_POSSTAR:
case OP_POSQUERY:
case OP_POSUPTO:
@@ -8737,6 +9726,8 @@ switch(opcode)
SLJIT_ASSERT_STOP();
break;
}
+
+set_jumps(current->topbacktracks, LABEL());
}
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -8754,12 +9745,12 @@ if ((type & 0x1) == 0)
set_jumps(current->topbacktracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
return;
}
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
-CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
set_jumps(current->topbacktracks, LABEL());
free_stack(common, ref ? 2 : 3);
}
@@ -9258,7 +10249,9 @@ else if (opcode == OP_ONCE)
/* The STR_PTR must be released. */
stacksize++;
}
- free_stack(common, stacksize);
+
+ if (stacksize > 0)
+ free_stack(common, stacksize);
JUMPHERE(once);
/* Restore previous private_data_ptr */
@@ -9688,8 +10681,8 @@ common->currententry->entry = LABEL();
set_jumps(common->currententry->calls, common->currententry->entry);
sljit_emit_fast_enter(compiler, TMP2, 0);
-allocate_stack(common, private_data_size + framesize + alternativesize);
count_match(common);
+allocate_stack(common, private_data_size + framesize + alternativesize);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
if (needs_control_head)
@@ -9795,7 +10788,7 @@ struct sljit_compiler *compiler;
backtrack_common rootbacktrack;
compiler_common common_data;
compiler_common *common = &common_data;
-const pcre_uint8 *tables = re->tables;
+const sljit_u8 *tables = re->tables;
pcre_study_data *study;
int private_data_size;
pcre_uchar *ccend;
@@ -9907,7 +10900,7 @@ ccend = bracketend(common->start);
/* Calculate the local space size on the stack. */
common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
-common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
+common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
if (!common->optimized_cbracket)
return;
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
@@ -9942,15 +10935,10 @@ if (mode != JIT_COMPILE)
common->hit_start = common->ovector_start;
common->ovector_start += 2 * sizeof(sljit_sw);
}
- else
- {
- SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
- common->needs_start_ptr = TRUE;
- }
}
if ((re->options & PCRE_FIRSTLINE) != 0)
{
- common->first_line_end = common->ovector_start;
+ common->match_end_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
@@ -9961,14 +10949,12 @@ if (common->control_head_ptr != 0)
common->control_head_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}
-if (common->needs_start_ptr && common->has_set_som)
+if (common->has_set_som)
{
/* Saving the real start pointer is necessary. */
common->start_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}
-else
- common->needs_start_ptr = FALSE;
/* Aligning ovector to even number of sljit words. */
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
@@ -9985,16 +10971,24 @@ SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
total_length = ccend - common->start;
-common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data);
+common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
if (!common->private_data_ptrs)
{
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
return;
}
-memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
+memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
set_private_data_ptrs(common, &private_data_size, ccend);
+if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
+ {
+ if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
+ detect_fast_fail(common, common->start, &private_data_size, 4);
+ }
+
+SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
+
if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
{
SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
@@ -10004,7 +10998,7 @@ if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
if (common->has_then)
{
- common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
+ common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
memset(common->then_offsets, 0, total_length);
set_then_offsets(common, common->start, NULL);
}
@@ -10031,12 +11025,15 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
-OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
+OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
+if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
+ reset_fast_fail(common);
+
if (mode == JIT_PARTIAL_SOFT_COMPILE)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
if (common->mark_ptr != 0)
@@ -10047,19 +11044,19 @@ if (common->control_head_ptr != 0)
/* Main part of the matching */
if ((re->options & PCRE_ANCHORED) == 0)
{
- mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+ mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
continue_match_label = LABEL();
/* Forward search if possible. */
if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
{
- if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
+ if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
;
else if ((re->flags & PCRE_FIRSTSET) != 0)
- fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+ fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
else if ((re->flags & PCRE_STARTLINE) != 0)
- fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
+ fast_forward_newline(common);
else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
- fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
+ fast_forward_start_bits(common, study->start_bits);
}
}
else
@@ -10080,14 +11077,11 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
if (common->capture_last_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
+if (common->fast_forward_bc_ptr != NULL)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
-if (common->needs_start_ptr)
- {
- SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
+if (common->start_ptr != OVECTOR(0))
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
- }
-else
- SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
/* Copy the beginning of the string. */
if (mode == JIT_PARTIAL_SOFT_COMPILE)
@@ -10166,11 +11160,12 @@ if (mode == JIT_PARTIAL_SOFT_COMPILE)
/* Check we have remaining characters. */
if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ SLJIT_ASSERT(common->match_end_ptr != 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
}
-OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
+ (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
if ((re->options & PCRE_ANCHORED) == 0)
{
@@ -10181,12 +11176,7 @@ if ((re->options & PCRE_ANCHORED) == 0)
/* There cannot be more newlines here. */
}
else
- {
- if ((re->options & PCRE_FIRSTLINE) == 0)
- CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
- else
- CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
- }
+ CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
}
/* No more remaining characters. */
@@ -10205,15 +11195,18 @@ if (common->might_be_empty)
{
JUMPHERE(empty_match);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
}
+common->fast_forward_bc_ptr = NULL;
+common->fast_fail_start_ptr = 0;
+common->fast_fail_end_ptr = 0;
common->currententry = common->entries;
common->local_exit = TRUE;
quit_label = common->quit_label;
@@ -10395,7 +11388,7 @@ union {
void *executable_func;
jit_function call_executable_func;
} convert_executable_func;
-pcre_uint8 local_space[MACHINE_STACK_SIZE];
+sljit_u8 local_space[MACHINE_STACK_SIZE];
struct sljit_stack local_stack;
local_stack.top = (sljit_sw)&local_space;
@@ -10435,7 +11428,7 @@ arguments.begin = subject;
arguments.end = subject + length;
arguments.mark_ptr = NULL;
/* JIT decreases this value less frequently than the interpreter. */
-arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
arguments.limit_match = functions->limit_match;
arguments.notbol = (options & PCRE_NOTBOL) != 0;
@@ -10528,7 +11521,7 @@ arguments.begin = subject_ptr;
arguments.end = subject_ptr + length;
arguments.mark_ptr = NULL;
/* JIT decreases this value less frequently than the interpreter. */
-arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
arguments.limit_match = functions->limit_match;
arguments.notbol = (options & PCRE_NOTBOL) != 0;