diff options
Diffstat (limited to 'ext/mbstring/oniguruma/regexec.c')
-rw-r--r-- | ext/mbstring/oniguruma/regexec.c | 2935 |
1 files changed, 0 insertions, 2935 deletions
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c deleted file mode 100644 index b7319ac4fb..0000000000 --- a/ext/mbstring/oniguruma/regexec.c +++ /dev/null @@ -1,2935 +0,0 @@ -/********************************************************************** - - regexec.c - Oniguruma (regular expression library) - - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) - -**********************************************************************/ -#include "regint.h" - -static UChar* -get_right_adjust_char_head_with_prev(RegCharEncoding code, - UChar* start, UChar* s, UChar** prev); -static UChar* -step_backward_char(RegCharEncoding code, UChar* start, UChar* s, int n); - - -extern void -regex_region_clear(RegRegion* region) -{ - int i; - - for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = REG_REGION_NOTPOS; - } -} - -extern int -regex_region_resize(RegRegion* region, int n) -{ - int i; - - region->num_regs = n; - - if (n < REG_NREGION) - n = REG_NREGION; - - if (region->allocated == 0) { - region->beg = (int* )xmalloc(n * sizeof(int)); - region->end = (int* )xmalloc(n * sizeof(int)); - - if (region->beg == 0 || region->end == 0) - return REGERR_MEMORY; - - region->allocated = n; - } - else if (region->allocated < n) { - region->beg = (int* )xrealloc(region->beg, n * sizeof(int)); - region->end = (int* )xrealloc(region->end, n * sizeof(int)); - - if (region->beg == 0 || region->end == 0) - return REGERR_MEMORY; - - region->allocated = n; - } - - for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = REG_REGION_NOTPOS; - } - return 0; -} - -static void -regex_region_init(RegRegion* region) -{ - region->num_regs = 0; - region->allocated = 0; - region->beg = (int* )0; - region->end = (int* )0; -} - -extern RegRegion* -regex_region_new() -{ - RegRegion* r; - - r = (RegRegion* )xmalloc(sizeof(RegRegion)); - regex_region_init(r); - return r; -} - -extern void -regex_region_free(RegRegion* r, int free_self) -{ - if (r) { - if (r->allocated > 0) { - if (r->beg) xfree(r->beg); - if (r->end) xfree(r->end); - r->allocated = 0; - } - if (free_self) xfree(r); - } -} - -extern void -regex_region_copy(RegRegion* to, RegRegion* from) -{ -#define RREGC_SIZE (sizeof(int) * from->num_regs) - int i; - - if (to == from) return; - - if (to->allocated == 0) { - if (from->num_regs > 0) { - to->beg = (int* )xmalloc(RREGC_SIZE); - to->end = (int* )xmalloc(RREGC_SIZE); - to->allocated = from->num_regs; - } - } - else if (to->allocated < from->num_regs) { - to->beg = (int* )xrealloc(to->beg, RREGC_SIZE); - to->end = (int* )xrealloc(to->end, RREGC_SIZE); - to->allocated = from->num_regs; - } - - for (i = 0; i < from->num_regs; i++) { - to->beg[i] = from->beg[i]; - to->end[i] = from->end[i]; - } - to->num_regs = from->num_regs; -} - - -/** stack **/ -#define INVALID_STACK_INDEX -1 -typedef int StackIndex; - -typedef struct _StackType { - unsigned int type; - union { - struct { - UChar *pcode; /* byte code position */ - UChar *pstr; /* string position */ - UChar *pstr_prev; /* previous char position of pstr */ - } state; - struct { - int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ - UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ - } repeat; - struct { - StackIndex si; /* index of stack */ - } repeat_inc; - struct { - int num; /* memory num */ - UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ - StackIndex start; /* prev. info (for backtrack "(...)*" ) */ - StackIndex end; /* prev. info (for backtrack "(...)*" ) */ - } mem; - struct { - int num; /* null check id */ - UChar *pstr; /* start position */ - } null_check; -#ifdef USE_SUBEXP_CALL - struct { - UChar *ret_addr; /* byte code position */ - int num; /* null check id */ - UChar *pstr; /* string position */ - } call_frame; -#endif - } u; -} StackType; - -/* stack type */ -/* used by normal-POP */ -#define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0003 -#define STK_POS_NOT 0x0005 -/* avoided by normal-POP, but value should be small */ -#define STK_NULL_CHECK_START 0x0100 -/* handled by normal-POP */ -#define STK_MEM_START 0x0200 -#define STK_MEM_END 0x0300 -#define STK_REPEAT_INC 0x0400 -/* avoided by normal-POP */ -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0700 -#define STK_CALL_FRAME 0x0800 -#define STK_RETURN 0x0900 -#define STK_MEM_END_MARK 0x0a00 -#define STK_VOID 0x0b00 /* for fill a blank */ - -/* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define IS_TO_VOID_TARGET(stk) \ - (((stk)->type & STK_MASK_POP_USED) || (stk)->type == STK_NULL_CHECK_START) - -typedef struct { - void* stack_p; - int stack_n; - RegOptionType options; - RegRegion* region; - UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -} MatchArg; - -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ -} while (0) - -#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) - - -#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ - if (msa->stack_p) {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ - stk_alloc = (StackType* )(msa->stack_p);\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + msa->stack_n;\ - }\ - else {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ - + sizeof(StackType) * (stack_num));\ - stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + (stack_num);\ - }\ -} while(0) - -#define STACK_SAVE do{\ - if (stk_base != stk_alloc) {\ - msa->stack_p = stk_base;\ - msa->stack_n = stk_end - stk_base;\ - };\ -} while(0) - -static int -stack_double(StackType** arg_stk_base, StackType** arg_stk_end, - StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) -{ - int n; - StackType *x, *stk_base, *stk_end, *stk; - - stk_base = *arg_stk_base; - stk_end = *arg_stk_end; - stk = *arg_stk; - - n = stk_end - stk_base; - if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (StackType* )xmalloc(sizeof(StackType) * n * 2); - if (IS_NULL(x)) { - STACK_SAVE; - return REGERR_MEMORY; - } - xmemcpy(x, stk_base, n * sizeof(StackType)); - n *= 2; - } - else { - n *= 2; - if (n > MATCH_STACK_LIMIT_SIZE) return REGERR_MATCH_STACK_LIMIT_OVER; - x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); - if (IS_NULL(x)) { - STACK_SAVE; - return REGERR_MEMORY; - } - } - *arg_stk = x + (stk - stk_base); - *arg_stk_base = x; - *arg_stk_end = x + n; - return 0; -} - -#define STACK_ENSURE(n) do {\ - if (stk_end - stk < (n)) {\ - int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ - if (r != 0) { STACK_SAVE; return r; } \ - }\ -} while(0) - -#define STACK_AT(index) (stk_base + (index)) -#define GET_STACK_INDEX(stk) ((stk) - stk_base) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_TYPE(stack_type) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) -#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) -#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ - STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) - -#define STACK_PUSH_REPEAT(id, pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT;\ - stk->u.repeat.num = (id);\ - stk->u.repeat.pcode = (pat);\ - stk->u.repeat.count = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_REPEAT_INC(sindex) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT_INC;\ - stk->u.repeat_inc.si = (sindex);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_START(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_START;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END_MARK(mnum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END_MARK;\ - stk->u.mem.num = (mnum);\ - STACK_INC;\ -} while(0) - -#define STACK_GET_MEM_START(mnum, k) do {\ - int level = 0;\ - k = stk;\ - while (k > stk_base) {\ - k--;\ - if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \ - && k->u.mem.num == (mnum)) {\ - level++;\ - }\ - else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ - if (level == 0) break;\ - level--;\ - }\ - }\ -} while (0) - -#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_START;\ - stk->u.null_check.num = (cnum);\ - stk->u.null_check.pstr = (s);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_CALL_FRAME(pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_CALL_FRAME;\ - stk->u.call_frame.ret_addr = (pat);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_RETURN do {\ - STACK_ENSURE(1);\ - stk->type = STK_RETURN;\ - STACK_INC;\ -} while(0) - - -#ifdef REG_DEBUG -#define STACK_BASE_CHECK(p) \ - if ((p) < stk_base) goto stack_error; -#else -#define STACK_BASE_CHECK(p) -#endif - -#define STACK_POP_ONE do {\ - stk--;\ - STACK_BASE_CHECK(stk); \ -} while(0) - -#define STACK_POP do {\ - switch (pop_level) {\ - case STACK_POP_LEVEL_FREE:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - }\ - break;\ - case STACK_POP_LEVEL_MEM_START:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - }\ - break;\ - default:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - }\ - break;\ - }\ -} while(0) - -#define STACK_POP_TIL_POS_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk); \ - if (stk->type == STK_POS_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - }\ -} while(0) - -#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk); \ - if (stk->type == STK_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - }\ -} while(0) - -#define STACK_POS_END(k) do {\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_POS) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_STOP_BT_END do {\ - StackType *k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_STOP_BT) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK(isnull,id,s) do {\ - StackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - }\ - }\ -} while(0) - -#define STACK_GET_REPEAT(id, k) do {\ - int level = 0;\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k); \ - if (k->type == STK_REPEAT) {\ - if (level == 0) {\ - if (k->u.repeat.num == (id)) {\ - break;\ - }\ - }\ - }\ - else if (k->type == STK_CALL_FRAME) level--;\ - else if (k->type == STK_RETURN) level++;\ - }\ -} while (0) - -#define STACK_RETURN(addr) do {\ - int level = 0;\ - StackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k); \ - if (k->type == STK_CALL_FRAME) {\ - if (level == 0) {\ - (addr) = k->u.call_frame.ret_addr;\ - break;\ - }\ - else level--;\ - }\ - else if (k->type == STK_RETURN)\ - level++;\ - }\ -} while(0) - - -#define CASETABLE_TOLOWER(c) (casetable[c]) - -/* byte_code is already converted to lower-case at string compile time */ -#define SBTRANSCMP(byte_code,c) (byte_code == CASETABLE_TOLOWER(c)) - -#define STRING_CMP(s1,s2,len) do {\ - if (ignore_case) {\ - int slen; \ - while (len > 0) {\ - slen = mblen(encode, *s1); \ - if (slen == 1) {\ - if (CASETABLE_TOLOWER(*s1) != CASETABLE_TOLOWER(*s2)) \ - goto fail;\ - s1++; s2++; len--; \ - } \ - else {\ - len -= slen; \ - while (slen-- > 0) { \ - if (*s1++ != *s2++) goto fail;\ - } \ - }\ - }\ - }\ - else {\ - while (len-- > 0) {\ - if (*s1++ != *s2++) goto fail;\ - }\ - }\ -} while(0) - -#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ - is_fail = 0;\ - if (ignore_case) {\ - int slen; \ - while (len > 0) {\ - slen = mblen(encode, *s1); \ - if (slen == 1) {\ - if (CASETABLE_TOLOWER(*s1) != CASETABLE_TOLOWER(*s2)) {\ - is_fail = 1; break;\ - }\ - s1++; s2++; len--; \ - } \ - else {\ - len -= slen; \ - while (slen-- > 0) { \ - if (*s1++ != *s2++) {\ - is_fail = 1; break;\ - }\ - } \ - if (is_fail != 0) break;\ - }\ - }\ - }\ - else {\ - while (len-- > 0) {\ - if (*s1++ != *s2++) {\ - is_fail = 1; break;\ - }\ - }\ - }\ -} while(0) - -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#define IS_EMPTY_STR (str == end) - -#define DATA_ENSURE(n) \ - if (s + (n) > end) goto fail - -#define DATA_ENSURE_CHECK(n) (s + (n) <= end) - -#ifdef REG_DEBUG_STATISTICS - -#define USE_TIMEOFDAY - -#ifdef USE_TIMEOFDAY -#ifdef HAVE_SYS_TIME_H -#include <sys/time.h> -#endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -static struct timeval ts, te; -#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) -#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ - (((te).tv_sec - (ts).tv_sec)*1000000)) -#else -#ifdef HAVE_SYS_TIMES_H -#include <sys/times.h> -#endif -static struct tms ts, te; -#define GETTIME(t) times(&(t)) -#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) -#endif - -static int OpCounter[256]; -static int OpPrevCounter[256]; -static unsigned long OpTime[256]; -static int OpCurr = OP_FINISH; -static int OpPrevTarget = OP_FAIL; -static int MaxStackDepth = 0; - -#define STAT_OP_IN(opcode) do {\ - if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ - OpCurr = opcode;\ - OpCounter[opcode]++;\ - GETTIME(ts);\ -} while (0) - -#define STAT_OP_OUT do {\ - GETTIME(te);\ - OpTime[OpCurr] += TIMEDIFF(te, ts);\ -} while (0) - -extern void regex_statistics_init() -{ - int i; - for (i = 0; i < 256; i++) { - OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; - } - MaxStackDepth = 0; - -#ifdef RUBY_PLATFORM - rb_define_global_function("regex_stat_print", regex_stat_print, 0); -#endif -} - -#ifdef RUBY_PLATFORM -static VALUE regex_stat_print() -{ - regex_print_statistics(stderr); - return Qnil; -} -#endif - -extern void -regex_print_statistics(FILE* f) -{ - int i; - fprintf(f, " count prev time\n"); - for (i = 0; RegOpInfo[i].opcode >= 0; i++) { - fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], RegOpInfo[i].name); - } - fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); -} - -#define STACK_INC do {\ - stk++;\ - if (stk - stk_base > MaxStackDepth) \ - MaxStackDepth = stk - stk_base;\ -} while (0) - -#else -#define STACK_INC stk++ - -#define STAT_OP_IN(opcode) -#define STAT_OP_OUT -#endif - -extern int -regex_is_in_wc_range(UChar* p, WCINT wc) -{ - WCINT n, *data; - int low, high, x; - - GET_WCINT(n, p); - data = (WCINT* )p; - data++; - - for (low = 0, high = n; low < high; ) { - x = (low + high) >> 1; - if (wc > data[x * 2 + 1]) - low = x + 1; - else - high = x; - } - - return ((low < n && wc >= data[low * 2]) ? 1 : 0); -} - - -/* matching region of POSIX API */ -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} posix_regmatch_t; - -/* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */ -static int -match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, - UChar* sprev, MatchArg* msa) -{ - static UChar FinishCode[] = { OP_FINISH }; - - int i, n, num_mem, best_len, pop_level, find_cond; - LengthType tlen, tlen2; - MemNumType mem; - RelAddrType addr; - RegOptionType option = reg->options; - RegCharEncoding encode = reg->enc; - unsigned char* casetable = DefaultTransTable; - int ignore_case; - UChar *s, *q, *sbegin; - UChar *p = reg->p; - char *alloca_base; - StackType *stk_alloc, *stk_base, *stk, *stk_end; - StackType *stkp; /* used as any purpose. */ - StackIndex *repeat_stk; - StackIndex *mem_start_stk, *mem_end_stk; - n = reg->num_repeat + reg->num_mem * 2; - - STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); - ignore_case = IS_IGNORECASE(option); - find_cond = IS_FIND_CONDITION(option); - pop_level = reg->stack_pop_level; - num_mem = reg->num_mem; - repeat_stk = (StackIndex* )alloca_base; - - mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + num_mem; - mem_start_stk--; /* for index start from 1, - mem_start_stk[1]..mem_start_stk[num_mem] */ - mem_end_stk--; /* for index start from 1, - mem_end_stk[1]..mem_end_stk[num_mem] */ - for (i = 1; i <= num_mem; i++) { - mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; - } - -#ifdef REG_DEBUG_MATCH - fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", - (int )str, (int )end, (int )sstart, (int )sprev); - fprintf(stderr, "size: %d, start offset: %d\n", - (int )(end - str), (int )(sstart - str)); -#endif - - STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ - best_len = REG_MISMATCH; - s = sstart; - while (1) { -#ifdef REG_DEBUG_MATCH - { - UChar *q, *bp, buf[50]; - int len; - fprintf(stderr, "%4d> \"", (int )(s - str)); - bp = buf; - for (i = 0, q = s; i < 7 && q < end; i++) { - len = mblen(encode, *q); - while (len-- > 0) *bp++ = *q++; - } - if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } - else { xmemcpy(bp, "\"", 1); bp += 1; } - *bp = 0; - fputs(buf, stderr); - for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - regex_print_compiled_byte_code(stderr, p, NULL); - fprintf(stderr, "\n"); - } -#endif - - sbegin = s; - switch (*p++) { - case OP_END: STAT_OP_IN(OP_END); - n = s - sstart; - if (n > best_len) { - RegRegion* region = msa->region; - best_len = n; - if (region) { - if (IS_POSIX_REGION(msa->options)) { - posix_regmatch_t* rmt = (posix_regmatch_t* )region; - - rmt[0].rm_so = sstart - str; - rmt[0].rm_eo = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->backtrack_mem, i)) - rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; - - rmt[i].rm_eo = (find_cond != 0 - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - rmt[i].rm_so = rmt[i].rm_eo = REG_REGION_NOTPOS; - } - } - } - else { - region->beg[0] = sstart - str; - region->end[0] = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->backtrack_mem, i)) - region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - - region->end[i] = (find_cond != 0 - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - region->beg[i] = region->end[i] = REG_REGION_NOTPOS; - } - } - } - } - } - STAT_OP_OUT; - - if (find_cond) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = REG_MISMATCH; - goto fail; /* for retry */ - } - if (IS_FIND_LONGEST(option) && s < end) { - goto fail; /* for retry */ - } - } - else { - /* default behavior: return first-matching result. */ - goto finish; - } - break; - - case OP_EXACT1: STAT_OP_IN(OP_EXACT1); -#if 0 - DATA_ENSURE(1); - if (*p != *s) goto fail; - p++; s++; -#endif - if (*p != *s++) goto fail; - DATA_ENSURE(0); - p++; - STAT_OP_OUT; - break; - - case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); - if (! SBTRANSCMP(*p, *s)) goto fail; - DATA_ENSURE(1); - p++; s++; - STAT_OP_OUT; - break; - - case OP_EXACT2: STAT_OP_IN(OP_EXACT2); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - STAT_OP_OUT; - continue; - break; - - case OP_EXACT3: STAT_OP_IN(OP_EXACT3); - DATA_ENSURE(3); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - STAT_OP_OUT; - continue; - break; - - case OP_EXACT4: STAT_OP_IN(OP_EXACT4); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - STAT_OP_OUT; - continue; - break; - - case OP_EXACT5: STAT_OP_IN(OP_EXACT5); - DATA_ENSURE(5); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - STAT_OP_OUT; - continue; - break; - - case OP_EXACTN: STAT_OP_IN(OP_EXACTN); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen); - while (tlen-- > 0) { - if (*p++ != *s++) goto fail; - } - sprev = s - 1; - STAT_OP_OUT; - continue; - break; - - case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen); - while (tlen-- > 0) { - if (! SBTRANSCMP(*p, *s)) goto fail; - p++; s++; - } - sprev = s - 1; - STAT_OP_OUT; - continue; - break; - - case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - STAT_OP_OUT; - break; - - case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - STAT_OP_OUT; - continue; - break; - - case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3); - DATA_ENSURE(6); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - STAT_OP_OUT; - continue; - break; - - case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 2); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 2; - STAT_OP_OUT; - continue; - break; - - case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 3); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 3; - STAT_OP_OUT; - continue; - break; - - case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN); - GET_LENGTH_INC(tlen, p); /* mb-len */ - GET_LENGTH_INC(tlen2, p); /* string len */ - tlen2 *= tlen; - DATA_ENSURE(tlen2); - while (tlen2-- > 0) { - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - tlen; - STAT_OP_OUT; - continue; - break; - - case OP_CCLASS: STAT_OP_IN(OP_CCLASS); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; - p += SIZE_BITSET; - s += mblen(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */ - STAT_OP_OUT; - break; - - case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); - if (! ismb(encode, *s)) goto fail; - - cclass_mb: - GET_LENGTH_INC(tlen, p); - { - WCINT wc; - UChar *ss; - int mb_len = mblen(encode, *s); - - DATA_ENSURE(mb_len); - ss = s; - s += mb_len; - wc = MB2WC(ss, s, encode); - -#ifdef UNALIGNED_WORD_ACCESS - if (! regex_is_in_wc_range(p, wc)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (! regex_is_in_wc_range(q, wc)) goto fail; -#endif - } - p += tlen; - STAT_OP_OUT; - break; - - case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); - DATA_ENSURE(1); - if (ismb(encode, *s)) { - p += SIZE_BITSET; - goto cclass_mb; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) == 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - STAT_OP_OUT; - break; - - case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; - p += SIZE_BITSET; - s += mblen(encode, *s); - STAT_OP_OUT; - break; - - case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); - if (! ismb(encode, *s)) { - DATA_ENSURE(1); - s++; - GET_LENGTH_INC(tlen, p); - p += tlen; - goto cc_mb_not_success; - } - - cclass_mb_not: - GET_LENGTH_INC(tlen, p); - { - WCINT wc; - UChar *ss; - int mb_len = mblen(encode, *s); - - if (s + mb_len > end) { - s = end; - p += tlen; - goto cc_mb_not_success; - } - - ss = s; - s += mb_len; - wc = MB2WC(ss, s, encode); - -#ifdef UNALIGNED_WORD_ACCESS - if (regex_is_in_wc_range(p, wc)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (regex_is_in_wc_range(q, wc)) goto fail; -#endif - } - p += tlen; - - cc_mb_not_success: - STAT_OP_OUT; - break; - - case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); - DATA_ENSURE(1); - if (ismb(encode, *s)) { - p += SIZE_BITSET; - goto cclass_mb_not; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) != 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - STAT_OP_OUT; - break; - - case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); - DATA_ENSURE(1); - if (ismb(encode, *s)) { - n = mblen(encode, *s); - DATA_ENSURE(n); - s += n; - } - else { - if (! IS_MULTILINE(option)) { - if (IS_NEWLINE(*s)) goto fail; - } - s++; - } - STAT_OP_OUT; - break; - - case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); - if (! IS_MULTILINE(option)) { - while (s < end) { - STACK_PUSH_ALT(p, s, sprev); - if (ismb(encode, *s)) { - n = mblen(encode, *s); - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - if (IS_NEWLINE(*s)) goto fail; - sprev = s; - s++; - } - } - } - else { - while (s < end) { - STACK_PUSH_ALT(p, s, sprev); - if (ismb(encode, *s)) { - n = mblen(encode, *s); - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - } - STAT_OP_OUT; - break; - - case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); - while (s < end) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - if (ismb(encode, *s)) { - n = mblen(encode, *s); - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - if (! IS_MULTILINE(option)) { - if (IS_NEWLINE(*s)) goto fail; - } - sprev = s; - s++; - } - } - p++; - STAT_OP_OUT; - break; - - case OP_WORD: STAT_OP_IN(OP_WORD); - DATA_ENSURE(1); - if (! IS_WORD_STR_INC(encode, s, end)) - goto fail; - STAT_OP_OUT; - break; - - case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); - DATA_ENSURE(1); - if (IS_WORD_STR_INC(encode, s, end)) - goto fail; - STAT_OP_OUT; - break; - -#ifdef USE_SBMB_CLASS - case OP_WORD_SB: STAT_OP_IN(OP_WORD_SB); - DATA_ENSURE(1); - if (! IS_SB_WORD(encode, *s)) - goto fail; - s++; - STAT_OP_OUT; - break; - - case OP_WORD_MB: STAT_OP_IN(OP_WORD_MB); - DATA_ENSURE(1); - if (! IS_MB_WORD(encode, *s)) - goto fail; - - n = mblen(encode, *s); - DATA_ENSURE(n); - s += n; - STAT_OP_OUT; - break; -#endif - - case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! IS_WORD_STR(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (! IS_WORD_STR(encode, sprev, end)) - goto fail; - } - else { - if (IS_WORD_STR(encode, s, end) == IS_WORD_STR(encode, sprev, end)) - goto fail; - } - STAT_OP_OUT; - continue; - break; - - case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK(1) && IS_WORD_STR(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (IS_WORD_STR(encode, sprev, end)) - goto fail; - } - else { - if (IS_WORD_STR(encode, s, end) != IS_WORD_STR(encode, sprev, end)) - goto fail; - } - STAT_OP_OUT; - continue; - break; - -#ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK(1) && IS_WORD_STR(encode, s, end)) { - if (ON_STR_BEGIN(s) || !IS_WORD_STR(encode, sprev, end)) { - STAT_OP_OUT; - continue; - } - } - goto fail; - break; - - case OP_WORD_END: STAT_OP_IN(OP_WORD_END); - if (!ON_STR_BEGIN(s) && IS_WORD_STR(encode, sprev, end)) { - if (ON_STR_END(s) || !IS_WORD_STR(encode, s, end)) { - STAT_OP_OUT; - continue; - } - } - goto fail; - break; -#endif - - case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF); - if (! ON_STR_BEGIN(s)) goto fail; - - STAT_OP_OUT; - continue; - break; - - case OP_END_BUF: STAT_OP_IN(OP_END_BUF); - if (! ON_STR_END(s)) goto fail; - - STAT_OP_OUT; - continue; - break; - - case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE); - if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; - STAT_OP_OUT; - continue; - } - else if (IS_NEWLINE(*sprev) && !ON_STR_END(s)) { - STAT_OP_OUT; - continue; - } - goto fail; - break; - - case OP_END_LINE: STAT_OP_IN(OP_END_LINE); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !IS_NEWLINE(*sprev)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; - STAT_OP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - else if (IS_NEWLINE(*s)) { - STAT_OP_OUT; - continue; - } - goto fail; - break; - - case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !IS_NEWLINE(*sprev)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ - STAT_OP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - if (IS_NEWLINE(*s) && ON_STR_END(s+1)) { - STAT_OP_OUT; - continue; - } - goto fail; - break; - - case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION); - if (s != msa->start) - goto fail; - - STAT_OP_OUT; - continue; - break; - - case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_START(mem, s); - STAT_OP_OUT; - continue; - break; - - case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START); - GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (StackIndex )((void* )s); - STAT_OP_OUT; - continue; - break; - - case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_END(mem, s); - STAT_OP_OUT; - continue; - break; - - case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (StackIndex )((void* )s); - STAT_OP_OUT; - continue; - break; - -#ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); - GET_MEMNUM_INC(mem, p); - STACK_GET_MEM_START(mem, stkp); - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - STACK_PUSH_MEM_END(mem, s); - STAT_OP_OUT; - continue; - break; - - case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (StackIndex )((void* )s); - STACK_GET_MEM_START(mem, stkp); - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - STACK_PUSH_MEM_END_MARK(mem); - STAT_OP_OUT; - continue; - break; -#endif - - case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1); - mem = 1; - goto backref; - break; - - case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2); - mem = 2; - goto backref; - break; - - case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3); - mem = 3; - goto backref; - break; - - case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); - GET_MEMNUM_INC(mem, p); - backref: - { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->backtrack_mem, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (find_cond != 0 - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP(pstart, s, n); - while (sprev + (len = mblen(encode, *sprev)) < s) - sprev += len; - - STAT_OP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI); - { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->backtrack_mem, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (find_cond != 0 - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE(pstart, swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = mblen(encode, *sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - STAT_OP_OUT; - continue; - } - break; - - case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); - GET_OPTION_INC(option, p); - ignore_case = IS_IGNORECASE(option); - STACK_PUSH_ALT(p, s, sprev); - p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - STAT_OP_OUT; - continue; - break; - - case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION); - GET_OPTION_INC(option, p); - ignore_case = IS_IGNORECASE(option); - STAT_OP_OUT; - continue; - break; - - case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START); - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_PUSH_NULL_CHECK_START(mem, s); - STAT_OP_OUT; - continue; - break; - - case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { -#ifdef REG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - /* empty loop founded, skip next instruction */ - switch (*p++) { - case OP_JUMP: - case OP_PUSH: - p += SIZE_RELADDR; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - p += SIZE_MEMNUM; - break; - default: - goto unexpected_bytecode_error; - break; - } - } - } - STAT_OP_OUT; - continue; - break; - - case OP_JUMP: STAT_OP_IN(OP_JUMP); - GET_RELADDR_INC(addr, p); - p += addr; - STAT_OP_OUT; - continue; - break; - - case OP_PUSH: STAT_OP_IN(OP_PUSH); - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; - continue; - break; - - case OP_POP: STAT_OP_IN(OP_POP); - STACK_POP_ONE; - STAT_OP_OUT; - continue; - break; - - case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1); - GET_RELADDR_INC(addr, p); - if (*p == *s && DATA_ENSURE_CHECK(1)) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; - continue; - } - p += (addr + 1); - STAT_OP_OUT; - continue; - break; - - case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT); - GET_RELADDR_INC(addr, p); - if (*p == *s) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; - continue; - } - p++; - STAT_OP_OUT; - continue; - break; - - case OP_REPEAT: STAT_OP_IN(OP_REPEAT); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); - } - } - STAT_OP_OUT; - continue; - break; - - case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p, s, sprev); - p += addr; - } - } - STAT_OP_OUT; - continue; - break; - - case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC); - { - StackIndex si; - - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ -#ifdef USE_SUBEXP_CALL - if (reg->num_call > 0) { - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - } - else { - si = repeat_stk[mem]; - stkp = STACK_AT(si); - } -#else - si = repeat_stk[mem]; - stkp = STACK_AT(si); -#endif - stkp->u.repeat.count++; - if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ - } - else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(p, s, sprev); - p = stkp->u.repeat.pcode; - } - else { - p = stkp->u.repeat.pcode; - } - STACK_PUSH_REPEAT_INC(si); - } - STAT_OP_OUT; - continue; - break; - - case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); - { - StackIndex si; - - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ -#ifdef USE_SUBEXP_CALL - if (reg->num_call > 0) { - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - } - else { - si = repeat_stk[mem]; - stkp = STACK_AT(si); - } -#else - si = repeat_stk[mem]; - stkp = STACK_AT(si); -#endif - stkp->u.repeat.count++; - if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ - } - else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(stkp->u.repeat.pcode, s, sprev); - } - else { - p = stkp->u.repeat.pcode; - } - STACK_PUSH_REPEAT_INC(si); - } - STAT_OP_OUT; - continue; - break; - - case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS); - STACK_PUSH_POS(s, sprev); - STAT_OP_OUT; - continue; - break; - - case OP_POP_POS: STAT_OP_IN(OP_POP_POS); - { - STACK_POS_END(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; - } - STAT_OP_OUT; - continue; - break; - - case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT); - GET_RELADDR_INC(addr, p); - STACK_PUSH_POS_NOT(p + addr, s, sprev); - STAT_OP_OUT; - continue; - break; - - case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS); - STACK_POP_TIL_POS_NOT; - goto fail; - break; - - case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT); - STACK_PUSH_STOP_BT; - STAT_OP_OUT; - continue; - break; - - case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT); - STACK_STOP_BT_END; - STAT_OP_OUT; - continue; - break; - - case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); - GET_LENGTH_INC(tlen, p); - s = MBBACK(encode, str, s, (int )tlen); - if (IS_NULL(s)) goto fail; - sprev = regex_get_prev_char_head(encode, str, s); - STAT_OP_OUT; - continue; - break; - - case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); - GET_RELADDR_INC(addr, p); - GET_LENGTH_INC(tlen, p); - q = MBBACK(encode, str, s, (int )tlen); - if (IS_NULL(q)) { - /* too short case -> success. ex. /(?<!XXX)a/.match("a") - If you want to change to fail, replace following line. */ - p += addr; - /* goto fail; */ - } - else { - STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); - s = q; - sprev = regex_get_prev_char_head(encode, str, s); - } - STAT_OP_OUT; - continue; - break; - - case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT); - STACK_POP_TIL_LOOK_BEHIND_NOT; - goto fail; - break; - -#ifdef USE_SUBEXP_CALL - case OP_CALL: STAT_OP_IN(OP_CALL); - GET_ABSADDR_INC(addr, p); - STACK_PUSH_CALL_FRAME(p); - p = reg->p + addr; - STAT_OP_OUT; - continue; - break; - - case OP_RETURN: STAT_OP_IN(OP_RETURN); - STACK_RETURN(p); - STACK_PUSH_RETURN; - STAT_OP_OUT; - continue; - break; -#endif - - case OP_FINISH: - goto finish; - break; - - fail: - STAT_OP_OUT; - /* fall */ - case OP_FAIL: STAT_OP_IN(OP_FAIL); - STACK_POP; - p = stk->u.state.pcode; - s = stk->u.state.pstr; - sprev = stk->u.state.pstr_prev; - STAT_OP_OUT; - continue; - break; - - default: - goto bytecode_error; - - } /* end of switch */ - sprev = sbegin; - } /* end of while(1) */ - - finish: - STACK_SAVE; - return best_len; - -#ifdef REG_DEBUG - stack_error: - STACK_SAVE; - return REGERR_STACK_BUG; -#endif - - bytecode_error: - STACK_SAVE; - return REGERR_UNDEFINED_BYTECODE; - - unexpected_bytecode_error: - STACK_SAVE; - return REGERR_UNEXPECTED_BYTECODE; -} - - -UChar* DefaultTransTable = (UChar* )0; - -#ifndef REG_RUBY_M17N -static const char SJIS_FOLLOW_TABLE[SINGLE_BYTE_SIZE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 -}; - -#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) -#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) -#define sjis_ismbfirst(c) ismb(REGCODE_SJIS, (c)) -#define sjis_ismbtrail(c) SJIS_FOLLOW_TABLE[(c)] - -extern WCINT -regex_mb2wc(UChar* p, UChar* end, RegCharEncoding code) -{ - int c, i, len; - WCINT n; - - if (code == REGCODE_UTF8) { - c = *p++; - len = mblen(code,c); - if (len > 1) { - len--; - n = c & ((1 << (6 - len)) - 1); - while (len--) { - c = *p++; - n = (n << 6) | (c & ((1 << 6) - 1)); - } - } - else - n = c; - } - else { - c = *p++; - len = mblen(code,c); - n = c; - if (len == 1) return n; - - for (i = 1; i < len; i++) { - if (p >= end) break; - c = *p++; - n <<= 8; n += c; - } - } - return n; -} -#endif /* REG_RUBY_M17N */ - -extern UChar* -regex_get_left_adjust_char_head(RegCharEncoding code, UChar* start, UChar* s) -{ - UChar *p; - int len; - - if (s <= start) return s; - p = s; - -#ifdef REG_RUBY_M17N - while (!m17n_islead(code, *p) && p > start) p--; - while (p + (len = mblen(code, *p)) < s) { - p += len; - } - if (p + len == s) return s; - return p; -#else - - if (code == REGCODE_ASCII) { - return p; - } - else if (code == REGCODE_EUCJP) { - while (!eucjp_islead(*p) && p > start) p--; - len = mblen(code, *p); - if (p + len > s) return p; - p += len; - return p + ((s - p) & ~1); - } - else if (code == REGCODE_SJIS) { - if (sjis_ismbtrail(*p)) { - while (p > start) { - if (! sjis_ismbfirst(*--p)) { - p++; - break; - } - } - } - len = mblen(code, *p); - if (p + len > s) return p; - p += len; - return p + ((s - p) & ~1); - } - else { /* REGCODE_UTF8 */ - while (!utf8_islead(*p) && p > start) p--; - return p; - } -#endif /* REG_RUBY_M17N */ -} - -extern UChar* -regex_get_right_adjust_char_head(RegCharEncoding code, UChar* start, UChar* s) -{ - UChar* p = regex_get_left_adjust_char_head(code, start, s); - - if (p < s) { - p += mblen(code, *p); - } - return p; -} - -static UChar* -get_right_adjust_char_head_with_prev(RegCharEncoding code, - UChar* start, UChar* s, UChar** prev) -{ - UChar* p = regex_get_left_adjust_char_head(code, start, s); - - if (p < s) { - if (prev) *prev = p; - p += mblen(code, *p); - } - else { - if (prev) *prev = (UChar* )NULL; /* Sorry */ - } - return p; -} - -extern UChar* -regex_get_prev_char_head(RegCharEncoding code, UChar* start, UChar* s) -{ - if (s <= start) - return (UChar* )NULL; - - return regex_get_left_adjust_char_head(code, start, s - 1); -} - -static UChar* -step_backward_char(RegCharEncoding code, UChar* start, UChar* s, int n) -{ - while (IS_NOT_NULL(s) && n-- > 0) { - if (s <= start) - return (UChar* )NULL; - - s = regex_get_left_adjust_char_head(code, start, s - 1); - } - return s; -} - -static UChar* -slow_search(RegCharEncoding code, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) -{ - UChar *t, *p, *s, *end; - - end = text_end - (target_end - target) + 1; - if (end > text_range) - end = text_range; - - s = text; - - while (s < end) { - if (*s == *target) { - p = s + 1; - t = target + 1; - while (t < target_end) { - if (*t != *p++) - break; - t++; - } - if (t == target_end) - return s; - } - s += mblen(code, *s); - } - - return (UChar* )NULL; -} - -static int -str_trans_match_after_head_byte(RegCharEncoding code, - int len, UChar* t, UChar* tend, UChar* p) -{ - while (--len > 0) { - if (*t != *p) break; - t++; p++; - } - - if (len == 0) { - while (t < tend) { - len = mblen(code, *p); - if (len == 1) { - if (*t != TOLOWER(code, *p)) - break; - p++; - t++; - } - else { - if (*t != *p++) break; - t++; - while (--len > 0) { - if (*t != *p) break; - t++; p++; - } - if (len > 0) break; - } - } - if (t == tend) - return 1; - } - - return 0; -} - -static UChar* -slow_search_ic(RegCharEncoding code, - UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) -{ - int len; - UChar *t, *p, *s, *end; - - end = text_end - (target_end - target) + 1; - if (end > text_range) - end = text_range; - - s = text; - - while (s < end) { - len = mblen(code, *s); - if (*s == *target || (len == 1 && TOLOWER(code, *s) == *target)) { - p = s + 1; - t = target + 1; - if (str_trans_match_after_head_byte(code, len, t, target_end, p)) - return s; - } - s += len; - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward(RegCharEncoding code, UChar* target, UChar* target_end, - UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start) -{ - UChar *t, *p, *s; - - s = text_end - (target_end - target); - if (s > text_start) - s = text_start; - else - s = regex_get_left_adjust_char_head(code, adjust_text, s); - - while (s >= text) { - if (*s == *target) { - p = s + 1; - t = target + 1; - while (t < target_end) { - if (*t != *p++) - break; - t++; - } - if (t == target_end) - return s; - } - s = regex_get_prev_char_head(code, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward_ic(RegCharEncoding code, - UChar* target,UChar* target_end, - UChar* text, UChar* adjust_text, - UChar* text_end, UChar* text_start) -{ - int len; - UChar *t, *p, *s; - - s = text_end - (target_end - target); - if (s > text_start) - s = text_start; - else - s = regex_get_left_adjust_char_head(code, adjust_text, s); - - while (s >= text) { - len = mblen(code, *s); - if (*s == *target || (len == 1 && TOLOWER(code, *s) == *target)) { - p = s + 1; - t = target + 1; - if (str_trans_match_after_head_byte(code, len, t, target_end, p)) - return s; - } - s = regex_get_prev_char_head(code, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) -{ - UChar *s, *t, *p, *end; - UChar *tail; - int skip; - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text; - while ((s - text) < target_end - target) { - s += mblen(reg->enc, *s); - } - s--; /* set to text check tail position. */ - - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (t >= target && *p == *t) { - p--; t--; - } - if (t < target) return p + 1; - - skip = reg->map[*s]; - p++; - t = p; - while ((p - t) < skip) { - p += mblen(reg->enc, *p); - } - s += (p - t); - } - } - else { - while (s < end) { - p = s; - t = tail; - while (t >= target && *p == *t) { - p--; t--; - } - if (t < target) return p + 1; - - skip = reg->int_map[*s]; - p++; - t = p; - while ((p - t) < skip) { - p += mblen(reg->enc, *p); - } - s += (p - t); - } - } - return (UChar* )NULL; -} - -static UChar* -bm_search(regex_t* reg, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) -{ - UChar *s, *t, *p, *end; - UChar *tail; - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (t >= target && *p == *t) { - p--; t--; - } - if (t < target) return p + 1; - s += reg->map[*s]; - } - } - else { /* see int_map[] */ - while (s < end) { - p = s; - t = tail; - while (t >= target && *p == *t) { - p--; t--; - } - if (t < target) return p + 1; - s += reg->int_map[*s]; - } - } - return (UChar* )NULL; -} - -static int -set_bm_backward_skip(UChar* s, UChar* end, RegCharEncoding enc, - int ignore_case, int** skip) -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * REG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return REGERR_MEMORY; - } - - len = end - s; - for (i = 0; i < REG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - if (ignore_case) { - for (i = len - 1; i > 0; i--) - (*skip)[TOLOWER(enc, s[i])] = i; - } - else { - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - } - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, - UChar* adjust_text, UChar* text_end, UChar* text_start) -{ - UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = regex_get_left_adjust_char_head(reg->enc, adjust_text, s); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return s; - - s -= reg->int_map_backward[*s]; - s = regex_get_left_adjust_char_head(reg->enc, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -map_search(RegCharEncoding code, UChar map[], UChar* text, UChar* text_range) -{ - UChar *s = text; - - while (s < text_range) { - if (map[*s]) return s; - - s += mblen(code, *s); - } - return (UChar* )NULL; -} - -static UChar* -map_search_backward(RegCharEncoding code, UChar map[], - UChar* text, UChar* adjust_text, UChar* text_start) -{ - UChar *s = text_start; - - while (s >= text) { - if (map[*s]) return s; - - s = regex_get_prev_char_head(code, adjust_text, s); - } - return (UChar* )NULL; -} - -extern int -regex_match(regex_t* reg, UChar* str, UChar* end, UChar* at, RegRegion* region, - RegOptionType option) -{ - int r; - UChar *prev; - MatchArg msa; - - MATCH_ARG_INIT(msa, option, region, at); - - if (region && !IS_POSIX_REGION(option)) - r = regex_region_resize(region, reg->num_mem + 1); - else - r = 0; - - if (r == 0) { - prev = regex_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, at, prev, &msa); - } - MATCH_ARG_FREE(msa); - return r; -} - -static int -forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) -{ - UChar *p, *pprev = (UChar* )NULL; - -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", - (int )str, (int )end, (int )s, (int )range); -#endif - - p = s; - if (reg->dmin > 0) { - if (IS_SINGLEBYTE_CODE(reg->enc)) { - p += reg->dmin; - } - else { - UChar *q = p + reg->dmin; - while (p < q) p += mblen(reg->enc, *p); - } - } - - retry: - switch (reg->optimize) { - case REG_OPTIMIZE_EXACT: - p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); - break; - case REG_OPTIMIZE_EXACT_IC: - p = slow_search_ic(reg->enc, reg->exact, reg->exact_end, p, end, range); - break; - - case REG_OPTIMIZE_EXACT_BM: - p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case REG_OPTIMIZE_EXACT_BM_NOT_REV: - p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case REG_OPTIMIZE_MAP: - p = map_search(reg->enc, reg->map, p, range); - break; - } - - if (p && p < range) { - if (p - reg->dmin < s) { - retry_gate: - pprev = p; - p += mblen(reg->enc, *p); - goto retry; - } - - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = regex_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); - if (!IS_NEWLINE(*prev)) - goto retry_gate; - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { - prev = regex_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); - if (prev && IS_NEWLINE(*prev)) - goto retry_gate; - } - else if (!IS_NEWLINE(*p)) - goto retry_gate; - break; - } - } - - if (reg->dmax == 0) { - *low = p; - if (low_prev) { - if (*low > s) - *low_prev = regex_get_prev_char_head(reg->enc, s, p); - else - *low_prev = regex_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - } - } - else { - if (reg->dmax != INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = get_right_adjust_char_head_with_prev(reg->enc, s, - *low, low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = regex_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); - } - else { - if (low_prev) - *low_prev = regex_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); - } - } - } - /* no needs to adjust *high, *high is used as range check only */ - *high = p - reg->dmin; - -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, - "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", - (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); -#endif - return 1; /* success */ - } - - return 0; /* fail */ -} - -static int set_bm_backward_skip P_((UChar* s, UChar* end, RegCharEncoding enc, - int ignore_case, int** skip)); - -#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 - -static int -backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, - UChar* range, UChar* adjrange, UChar** low, UChar** high) -{ - int r; - UChar *p; - - range += reg->dmin; - p = s; - - retry: - switch (reg->optimize) { - case REG_OPTIMIZE_EXACT: - exact_method: - p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - - case REG_OPTIMIZE_EXACT_IC: - p = slow_search_backward_ic(reg->enc, reg->exact, - reg->exact_end, range, adjrange, end, p); - break; - - case REG_OPTIMIZE_EXACT_BM: - case REG_OPTIMIZE_EXACT_BM_NOT_REV: - if (IS_NULL(reg->int_map_backward)) { - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 0, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); - break; - - case REG_OPTIMIZE_MAP: - p = map_search_backward(reg->enc, reg->map, range, adjrange, p); - break; - } - - if (p) { - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = regex_get_prev_char_head(reg->enc, adjrange, p); - if (!IS_NEWLINE(*prev)) { - p = prev; - goto retry; - } - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { - prev = regex_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(prev)) goto fail; - if (IS_NEWLINE(*prev)) { - p = prev; - goto retry; - } - } - else if (!IS_NEWLINE(*p)) { - p = regex_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(p)) goto fail; - goto retry; - } - break; - } - } - - /* no needs to adjust *high, *high is used as range check only */ - if (reg->dmax != INFINITE_DISTANCE) { - *low = p - reg->dmax; - *high = p - reg->dmin; - *high = regex_get_right_adjust_char_head(reg->enc, adjrange, *high); - } - -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: low: %d, high: %d\n", - (int )(*low - str), (int )(*high - str)); -#endif - return 1; /* success */ - } - - fail: -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: fail.\n"); -#endif - return 0; /* fail */ -} - - -extern int -regex_search(regex_t* reg, UChar* str, UChar* end, - UChar* start, UChar* range, RegRegion* region, RegOptionType option) -{ - int r; - UChar *s, *prev; - MatchArg msa; - - if (REG_STATE(reg) == REG_STATE_NORMAL) { - reg->state++; /* increment as search counter */ - if (IS_NOT_NULL(reg->chain)) { - regex_chain_reduce(reg); - reg->state++; - } - } - else { - int n = 0; - while (REG_STATE(reg) < REG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return REGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - reg->state++; /* increment as search counter */ - } - -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "regex_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - if (region && !IS_POSIX_REGION(option)) { - r = regex_region_resize(region, reg->num_mem + 1); - if (r) goto finish_no_msa; - } - - if (start > end || start < str) goto mismatch_no_msa; - -#define MATCH_AND_RETURN_CHECK \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != REG_MISMATCH) {\ - if (r >= 0) goto match;\ - goto finish; /* error */ \ - } - - /* anchor optimize: resume search range */ - if (reg->anchor != 0 && str < end) { - UChar* semi_end; - - if (reg->anchor & ANCHOR_BEGIN_POSITION) { - /* search start-position only */ - begin_position: - if (range > start) - range = start + 1; - else - range = start; - } - else if (reg->anchor & ANCHOR_BEGIN_BUF) { - /* search str-position only */ - if (range > start) { - if (start != str) goto mismatch_no_msa; - range = str + 1; - } - else { - if (range <= str) { - start = str; - range = str; - } - else - goto mismatch_no_msa; - } - } - else if (reg->anchor & ANCHOR_END_BUF) { - semi_end = end; - - end_buf: - if (semi_end - str < reg->anchor_dmin) - goto mismatch_no_msa; - - if (range > start) { - if (semi_end - start > reg->anchor_dmax) { - start = semi_end - reg->anchor_dmax; - if (start < end) - start = regex_get_right_adjust_char_head(reg->enc, str, start); - else { /* match with empty at end */ - start = regex_get_prev_char_head(reg->enc, str, end); - } - } - if (semi_end - (range - 1) < reg->anchor_dmin) { - range = semi_end - reg->anchor_dmin + 1; - } - - if (start >= range) goto mismatch_no_msa; - } - else { - if (semi_end - range > reg->anchor_dmax) { - range = semi_end - reg->anchor_dmax; - } - if (semi_end - start < reg->anchor_dmin) { - start = semi_end - reg->anchor_dmin; - start = regex_get_left_adjust_char_head(reg->enc, str, start); - if (range > start) goto mismatch_no_msa; - } - } - } - else if (reg->anchor & ANCHOR_SEMI_END_BUF) { - if (IS_NEWLINE(end[-1])) { - semi_end = end - 1; - if (semi_end > str && start <= semi_end) { - goto end_buf; - } - } - else { - semi_end = end; - goto end_buf; - } - } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_PL)) { - goto begin_position; - } - } - else if (str == end) { /* empty string */ - static UChar* address_for_empty_string = ""; - -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "regex_search: empty string.\n"); -#endif - - if (reg->threshold_len == 0) { - s = start = end = str = address_for_empty_string; - prev = (UChar* )NULL; - - MATCH_ARG_INIT(msa, option, region, start); - MATCH_AND_RETURN_CHECK; - goto mismatch; - } - goto mismatch_no_msa; - } - -#ifdef REG_DEBUG_SEARCH - fprintf(stderr, "regex_search(apply anchor): end: %d, start: %d, range: %d\n", - (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - MATCH_ARG_INIT(msa, option, region, start); - - s = start; - if (range > start) { /* forward search */ - if (s > str) - prev = regex_get_prev_char_head(reg->enc, str, s); - else - prev = (UChar* )NULL; - - if (reg->optimize != REG_OPTIMIZE_NONE) { - UChar *sch_range, *low, *high, *low_prev; - - sch_range = range; - if (reg->dmax != 0) { - if (reg->dmax == INFINITE_DISTANCE) - sch_range = end; - else { - sch_range += reg->dmax; - if (sch_range > end) sch_range = end; - } - } - if (reg->dmax != INFINITE_DISTANCE && - (end - start) >= reg->threshold_len) { - do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; - if (s < low) { - s = low; - prev = low_prev; - } - while (s <= high) { - MATCH_AND_RETURN_CHECK; - prev = s; - s += mblen(reg->enc, *s); - } - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { - if (IS_NOT_NULL(prev)) { - while (!IS_NEWLINE(*prev) && s < range) { - prev = s; - s += mblen(reg->enc, *s); - } - } - } - } while (s < range); - goto mismatch; - } - else { /* check only. */ - if ((end - start) < reg->threshold_len || - ! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; - } - } - - do { - MATCH_AND_RETURN_CHECK; - prev = s; - s += mblen(reg->enc, *s); - } while (s <= range); /* exec s == range, because empty match with /$/. */ - } - else { /* backward search */ - if (reg->optimize != REG_OPTIMIZE_NONE) { - UChar *low, *high, *adjrange, *sch_start; - - adjrange = regex_get_left_adjust_char_head(reg->enc, str, range); - if (reg->dmax != INFINITE_DISTANCE && - (end - range) >= reg->threshold_len) { - do { - sch_start = s + reg->dmax; - if (sch_start > end) sch_start = end; - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) - goto mismatch; - - if (s > high) - s = high; - - while (s >= low) { - prev = regex_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK; - s = prev; - } - } while (s >= range); - goto mismatch; - } - else { /* check only. */ - if ((end - range) < reg->threshold_len) goto mismatch; - - sch_start = s; - if (reg->dmax != 0) { - if (reg->dmax == INFINITE_DISTANCE) - sch_start = end; - else { - sch_start += reg->dmax; - if (sch_start > end) sch_start = end; - else - sch_start = regex_get_left_adjust_char_head(reg->enc, start, - sch_start); - } - } - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) goto mismatch; - } - } - - do { - prev = regex_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK; - s = prev; - } while (s >= range); - } - - mismatch: - r = REG_MISMATCH; - - finish: - MATCH_ARG_FREE(msa); - reg->state--; /* decrement as search counter */ - - /* If result is mismatch and no FIND_NOT_EMPTY option, - then the region is not setted in match_at(). */ - if (IS_FIND_NOT_EMPTY(reg->options) && region && !IS_POSIX_REGION(option)) - regex_region_clear(region); - -#ifdef REG_DEBUG - if (r != REG_MISMATCH) - fprintf(stderr, "regex_search: error %d\n", r); -#endif - return r; - - mismatch_no_msa: - r = REG_MISMATCH; - finish_no_msa: - reg->state--; /* decrement as search counter */ -#ifdef REG_DEBUG - if (r != REG_MISMATCH) - fprintf(stderr, "regex_search: error %d\n", r); -#endif - return r; - - match: - reg->state--; /* decrement as search counter */ - MATCH_ARG_FREE(msa); - return s - str; -} - -extern const char* -regex_version(void) -{ -#define MSTR(a) # a - - return (MSTR(ONIGURUMA_VERSION_MAJOR) "." - MSTR(ONIGURUMA_VERSION_MINOR) "." - MSTR(ONIGURUMA_VERSION_TEENY)); -} |