diff options
Diffstat (limited to 'ext/mbstring/oniguruma/regexec.c')
-rw-r--r-- | ext/mbstring/oniguruma/regexec.c | 998 |
1 files changed, 426 insertions, 572 deletions
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index 918aa67aa8..7430d78514 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,10 +29,12 @@ #include "regint.h" +#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + #ifdef USE_CRNL_AS_LINE_TERMINATOR #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ - ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end)) + ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif #ifdef USE_CAPTURE_HISTORY @@ -111,7 +113,7 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) (OnigCaptureTreeNode** )xrealloc(parent->childs, sizeof(OnigCaptureTreeNode*) * n); } - CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(parent->childs); for (i = parent->allocated; i < n; i++) { parent->childs[i] = (OnigCaptureTreeNode* )0; } @@ -196,7 +198,7 @@ onig_region_resize(OnigRegion* region, int n) return 0; } -extern int +static int onig_region_resize_clear(OnigRegion* region, int n) { int r; @@ -297,47 +299,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /** stack **/ #define INVALID_STACK_INDEX -1 -typedef long StackIndex; - -typedef struct _StackType { - unsigned int type; - union { - struct { - UChar *pcode; /* byte code position */ - UChar *pstr; /* string position */ - UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif - } state; - struct { - int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ - UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ - } repeat; - struct { - StackIndex si; /* index of stack */ - } repeat_inc; - struct { - int num; /* memory num */ - UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ - StackIndex start; /* prev. info (for backtrack "(...)*" ) */ - StackIndex end; /* prev. info (for backtrack "(...)*" ) */ - } mem; - struct { - int num; /* null check id */ - UChar *pstr; /* start position */ - } null_check; -#ifdef USE_SUBEXP_CALL - struct { - UChar *ret_addr; /* byte code position */ - int num; /* null check id */ - UChar *pstr; /* string position */ - } call_frame; -#endif - } u; -} StackType; /* stack type */ /* used by normal-POP */ @@ -365,22 +326,6 @@ typedef struct _StackType { #define STK_MASK_TO_VOID_TARGET 0x10ff #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ -typedef struct { - void* stack_p; - int stack_n; - OnigOptionType options; - OnigRegion* region; - const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - int best_len; /* for ONIG_OPTION_FIND_LONGEST */ - UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - void* state_check_buff; - int state_check_buff_size; -#endif -} MatchArg; - #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ @@ -388,14 +333,14 @@ typedef struct { (msa).region = (arg_region);\ (msa).start = (arg_start);\ (msa).best_len = ONIG_MISMATCH;\ -} while (0) +} while(0) #else #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ -} while (0) +} while(0) #endif #ifdef USE_COMBINATION_EXPLOSION_CHECK @@ -424,14 +369,14 @@ typedef struct { (msa).state_check_buff = (void* )0;\ (msa).state_check_buff_size = 0;\ }\ -} while (0) + } while(0) #define MATCH_ARG_FREE(msa) do {\ if ((msa).stack_p) xfree((msa).stack_p);\ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ if ((msa).state_check_buff) xfree((msa).state_check_buff);\ }\ -} while (0); +} while(0) #else #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) @@ -442,15 +387,15 @@ typedef struct { #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ if (msa->stack_p) {\ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ - stk_alloc = (StackType* )(msa->stack_p);\ + stk_alloc = (OnigStackType* )(msa->stack_p);\ stk_base = stk_alloc;\ stk = stk_base;\ stk_end = stk_base + msa->stack_n;\ }\ else {\ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ - + sizeof(StackType) * (stack_num));\ - stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ + + sizeof(OnigStackType) * (stack_num));\ + stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ stk_base = stk_alloc;\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ @@ -480,11 +425,11 @@ onig_set_match_stack_limit_size(unsigned int size) } static int -stack_double(StackType** arg_stk_base, StackType** arg_stk_end, - StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) +stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, + OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) { unsigned int n; - StackType *x, *stk_base, *stk_end, *stk; + OnigStackType *x, *stk_base, *stk_end, *stk; stk_base = *arg_stk_base; stk_end = *arg_stk_end; @@ -492,12 +437,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, n = stk_end - stk_base; if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (StackType* )xmalloc(sizeof(StackType) * n * 2); + x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); if (IS_NULL(x)) { STACK_SAVE; return ONIGERR_MEMORY; } - xmemcpy(x, stk_base, n * sizeof(StackType)); + xmemcpy(x, stk_base, n * sizeof(OnigStackType)); n *= 2; } else { @@ -508,7 +453,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, else n = MatchStackLimitSize; } - x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); + x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); if (IS_NULL(x)) { STACK_SAVE; return ONIGERR_MEMORY; @@ -680,7 +625,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, level--;\ }\ }\ -} while (0) +} while(0) #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ int level = 0;\ @@ -698,7 +643,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, }\ k++;\ }\ -} while (0) +} while(0) #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ @@ -844,7 +789,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) #define STACK_STOP_BT_END do {\ - StackType *k = stk;\ + OnigStackType *k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ @@ -859,7 +804,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) #define STACK_NULL_CHECK(isnull,id,s) do {\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ @@ -874,7 +819,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ int level = 0;\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ @@ -894,7 +839,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } while(0) #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ @@ -934,7 +879,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ int level = 0;\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ @@ -996,11 +941,11 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, else if (k->type == STK_CALL_FRAME) level--;\ else if (k->type == STK_RETURN) level++;\ }\ -} while (0) +} while(0) #define STACK_RETURN(addr) do {\ int level = 0;\ - StackType* k = stk;\ + OnigStackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_RETURN"); \ @@ -1023,25 +968,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, }\ } while(0) -#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\ - if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ +#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ goto fail; \ } while(0) -static int string_cmp_ic(OnigEncoding enc, int ambig_flag, +static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, UChar* s1, UChar** ps2, int mblen) { - UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN]; - UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN]; - UChar *p1, *p2, *end, *s2, *end2; + UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p1, *p2, *end1, *s2, *end2; int len1, len2; s2 = *ps2; - end = s1 + mblen; + end1 = s1 + mblen; end2 = s2 + mblen; - while (s1 < end) { - len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1); - len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2); + while (s1 < end1) { + len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1); + len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2); if (len1 != len2) return 0; p1 = buf1; p2 = buf2; @@ -1065,31 +1010,36 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag, }\ } while(0) -#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\ - if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ +#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ is_fail = 1; \ else \ is_fail = 0; \ } while(0) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#define IS_EMPTY_STR (str == end) - -#define DATA_ENSURE(n) \ - if (s + (n) > end) goto fail - +#define IS_EMPTY_STR (str == end) +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define DATA_ENSURE_CHECK1 (s < right_range) +#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) +#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +#else +#define DATA_ENSURE_CHECK1 (s < end) #define DATA_ENSURE_CHECK(n) (s + (n) <= end) +#define DATA_ENSURE(n) if (s + (n) > end) goto fail +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + #ifdef USE_CAPTURE_HISTORY static int -make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, - StackType* stk_top, UChar* str, regex_t* reg) +make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, + OnigStackType* stk_top, UChar* str, regex_t* reg) { int n, r; OnigCaptureTreeNode* child; - StackType* k = *kp; + OnigStackType* k = *kp; while (k < stk_top) { if (k->type == STK_MEM_START) { @@ -1097,7 +1047,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && BIT_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); - CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(child); child->group = n; child->beg = (int )(k->u.mem.pstr - str); r = history_tree_add_child(node, child); @@ -1124,7 +1074,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } #endif -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL static int mem_is_in_memp(int mem, int num, UChar* memp) { int i; @@ -1138,13 +1088,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp) } static int backref_match_at_nested_level(regex_t* reg - , StackType* top, StackType* stk_base - , int ignore_case, int ambig_flag + , OnigStackType* top, OnigStackType* stk_base + , int ignore_case, int case_fold_flag , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) { UChar *ss, *p, *pstart, *pend = NULL_UCHARP; int level; - StackType* k; + OnigStackType* k; level = 0; k = top; @@ -1166,7 +1116,7 @@ static int backref_match_at_nested_level(regex_t* reg ss = *s; if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, ambig_flag, + if (string_cmp_ic(reg->enc, case_fold_flag, pstart, &ss, (int )(pend - pstart)) == 0) return 0; /* or goto next_mem; */ } @@ -1192,70 +1142,8 @@ static int backref_match_at_nested_level(regex_t* reg return 0; } -#endif /* USE_BACKREF_AT_LEVEL */ - - -#ifdef RUBY_PLATFORM - -typedef struct { - int state; - regex_t* reg; - MatchArg* msa; - StackType* stk_base; -} TrapEnsureArg; - -static VALUE -trap_ensure(VALUE arg) -{ - TrapEnsureArg* ta = (TrapEnsureArg* )arg; - - if (ta->state == 0) { /* trap_exec() is not normal return */ - ONIG_STATE_DEC_THREAD(ta->reg); - if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) - xfree(ta->stk_base); - - MATCH_ARG_FREE(*(ta->msa)); - } - - return Qnil; -} +#endif /* USE_BACKREF_WITH_LEVEL */ -static VALUE -trap_exec(VALUE arg) -{ - TrapEnsureArg* ta; - - rb_trap_exec(); - - ta = (TrapEnsureArg* )arg; - ta->state = 1; /* normal return */ - return Qnil; -} - -extern void -onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base) -{ - VALUE arg; - TrapEnsureArg ta; - - ta.state = 0; - ta.reg = reg; - ta.msa = msa; - ta.stk_base = stk_base; - arg = (VALUE )(&ta); - rb_ensure(trap_exec, arg, trap_ensure, arg); -} - -#define CHECK_INTERRUPT_IN_MATCH_AT do {\ - if (rb_trap_pending) {\ - if (! rb_prohibit_interrupt) {\ - onig_exec_trap(reg, msa, stk_base);\ - }\ - }\ -} while (0) -#else -#define CHECK_INTERRUPT_IN_MATCH_AT -#endif /* RUBY_PLATFORM */ #ifdef ONIG_DEBUG_STATISTICS @@ -1288,41 +1176,26 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define STAT_OP_IN(opcode) do {\ +#define MOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ -} while (0) +} while(0) -#define STAT_OP_OUT do {\ +#define MOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ -} while (0) - -#ifdef RUBY_PLATFORM - -/* - * :nodoc: - */ -static VALUE onig_stat_print(void) -{ - onig_print_statistics(stderr); - return Qnil; -} -#endif +} while(0) -extern void onig_statistics_init(void) +extern void +onig_statistics_init(void) { int i; for (i = 0; i < 256; i++) { OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; } MaxStackDepth = 0; - -#ifdef RUBY_PLATFORM - rb_define_global_function("onig_stat_print", onig_stat_print, 0); -#endif } extern void @@ -1341,73 +1214,15 @@ onig_print_statistics(FILE* f) stk++;\ if (stk - stk_base > MaxStackDepth) \ MaxStackDepth = stk - stk_base;\ -} while (0) +} while(0) #else #define STACK_INC stk++ -#define STAT_OP_IN(opcode) -#define STAT_OP_OUT +#define MOP_IN(opcode) +#define MOP_OUT #endif -extern int -onig_is_in_code_range(const UChar* p, OnigCodePoint code) -{ - OnigCodePoint n, *data; - OnigCodePoint low, high, x; - - GET_CODE_POINT(n, p); - data = (OnigCodePoint* )p; - data++; - - for (low = 0, high = n; low < high; ) { - x = (low + high) >> 1; - if (code > data[x * 2 + 1]) - low = x + 1; - else - high = x; - } - - return ((low < n && code >= data[low * 2]) ? 1 : 0); -} - -static int -is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_CCLASS_NOT(cc)) - return !found; - else - return found; -} - -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int len; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - len = 2; - } - else { - len = ONIGENC_CODE_TO_MBCLEN(enc, code); - } - return is_code_in_cc(len, code, cc); -} - /* matching region of POSIX API */ typedef int regoff_t; @@ -1420,8 +1235,11 @@ typedef struct { /* match data(str - end) from position (sstart). */ /* if sstart == str then set sprev to NULL. */ static int -match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, - UChar* sprev, MatchArg* msa) +match_at(regex_t* reg, const UChar* str, const UChar* end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar* right_range, +#endif + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1431,15 +1249,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, RelAddrType addr; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; - OnigAmbigType ambig_flag = reg->ambig_flag; + OnigCaseFoldType case_fold_flag = reg->case_fold_flag; UChar *s, *q, *sbegin; UChar *p = reg->p; char *alloca_base; - StackType *stk_alloc, *stk_base, *stk, *stk_end; - StackType *stkp; /* used as any purpose. */ - StackIndex si; - StackIndex *repeat_stk; - StackIndex *mem_start_stk, *mem_end_stk; + OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + OnigStackType *stkp; /* used as any purpose. */ + OnigStackIndex si; + OnigStackIndex *repeat_stk; + OnigStackIndex *mem_start_stk, *mem_end_stk; #ifdef USE_COMBINATION_EXPLOSION_CHECK int scv; unsigned char* state_check_buff = msa->state_check_buff; @@ -1450,9 +1268,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); pop_level = reg->stack_pop_level; num_mem = reg->num_mem; - repeat_stk = (StackIndex* )alloca_base; + repeat_stk = (OnigStackIndex* )alloca_base; - mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat); + mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); mem_end_stk = mem_start_stk + num_mem; mem_start_stk--; /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */ @@ -1480,13 +1298,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = enc_len(encode, q); + len = enclen(encode, q); while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } else { xmemcpy(bp, "\"", 1); bp += 1; } *bp = 0; - fputs(buf, stderr); + fputs((char* )buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); onig_print_compiled_byte_code(stderr, p, NULL, encode); fprintf(stderr, "\n"); @@ -1495,7 +1313,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, sbegin = s; switch (*p++) { - case OP_END: STAT_OP_IN(OP_END); + case OP_END: MOP_IN(OP_END); n = s - sstart; if (n > best_len) { OnigRegion* region; @@ -1512,7 +1330,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, best_len = n; region = msa->region; if (region) { -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; @@ -1535,7 +1353,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } else { -#endif /* USE_POSIX_REGION_OPTION */ +#endif /* USE_POSIX_API_REGION_OPTION */ region->beg[0] = sstart - str; region->end[0] = s - str; for (i = 1; i <= num_mem; i++) { @@ -1561,7 +1379,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (IS_NULL(region->history_root)) { region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(node); } else { node = region->history_root; @@ -1581,7 +1399,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } #endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION } /* else IS_POSIX_REGION() */ #endif } /* if (region) */ @@ -1590,14 +1408,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE end_best_len: #endif - STAT_OP_OUT; + MOP_OUT; if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { best_len = ONIG_MISMATCH; goto fail; /* for retry */ } - if (IS_FIND_LONGEST(option) && s < end) { + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { goto fail; /* for retry */ } } @@ -1606,7 +1424,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto finish; break; - case OP_EXACT1: STAT_OP_IN(OP_EXACT1); + case OP_EXACT1: MOP_IN(OP_EXACT1); #if 0 DATA_ENSURE(1); if (*p != *s) goto fail; @@ -1615,19 +1433,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s++) goto fail; DATA_ENSURE(0); p++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); + case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); { int len; - UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; DATA_ENSURE(1); - ss = s; - sp = p; - - len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { @@ -1637,21 +1455,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; q++; } } - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACT2: STAT_OP_IN(OP_EXACT2); + case OP_EXACT2: MOP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT3: STAT_OP_IN(OP_EXACT3); + case OP_EXACT3: MOP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -1660,11 +1478,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT4: STAT_OP_IN(OP_EXACT4); + case OP_EXACT4: MOP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1675,11 +1493,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACT5: STAT_OP_IN(OP_EXACT5); + case OP_EXACT5: MOP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -1692,25 +1510,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, if (*p != *s) goto fail; sprev = s; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTN: STAT_OP_IN(OP_EXACTN); + case OP_EXACTN: MOP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); + case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); { int len; - UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; GET_LENGTH_INC(tlen, p); endp = p + tlen; @@ -1718,35 +1536,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, while (p < endp) { sprev = s; DATA_ENSURE(1); - ss = s; - sp = p; - - len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { - if (*p != *q) { - goto fail; - } + if (*p != *q) goto fail; p++; q++; } } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N1: STAT_OP_IN(OP_EXACTMB2N1); + case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_EXACTMB2N2: STAT_OP_IN(OP_EXACTMB2N2); + case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1757,11 +1573,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N3: STAT_OP_IN(OP_EXACTMB2N3); + case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -1776,11 +1592,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; if (*p != *s) goto fail; p++; s++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB2N: STAT_OP_IN(OP_EXACTMB2N); + case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -1790,11 +1606,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; } sprev = s - 2; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMB3N: STAT_OP_IN(OP_EXACTMB3N); + case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -1806,11 +1622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; } sprev = s - 3; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_EXACTMBN: STAT_OP_IN(OP_EXACTMBN); + case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -1820,19 +1636,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p++; s++; } sprev = s - tlen; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_CCLASS: STAT_OP_IN(OP_CCLASS); + case OP_CCLASS: MOP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - STAT_OP_OUT; + s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ + MOP_OUT; break; - case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); + case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -1843,7 +1659,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, int mb_len; DATA_ENSURE(1); - mb_len = enc_len(encode, s); + mb_len = enclen(encode, s); DATA_ENSURE(mb_len); ss = s; s += mb_len; @@ -1858,10 +1674,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, #endif } p += tlen; - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); + case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1876,18 +1692,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += tlen; s++; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_NOT: STAT_OP_IN(OP_CCLASS_NOT); + case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s); - STAT_OP_OUT; + s += enclen(encode, s); + MOP_OUT; break; - case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); + case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -1901,9 +1717,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, { OnigCodePoint code; UChar *ss; - int mb_len = enc_len(encode, s); + int mb_len = enclen(encode, s); - if (s + mb_len > end) { + if (! DATA_ENSURE_CHECK(mb_len)) { DATA_ENSURE(1); s = (UChar* )end; p += tlen; @@ -1925,10 +1741,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += tlen; cc_mb_not_success: - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); + case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1943,10 +1759,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += tlen; s++; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); { OnigCodePoint code; void *node; @@ -1955,49 +1771,49 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, DATA_ENSURE(1); GET_POINTER_INC(node, p); - mb_len = enc_len(encode, s); + mb_len = enclen(encode, s); ss = s; s += mb_len; DATA_ENSURE(0); code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (is_code_in_cc(mb_len, code, node) == 0) goto fail; + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); + case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); + case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); s += n; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); - while (s < end) { + case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); - while (s < end) { + case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s); + n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -2008,31 +1824,31 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, s++; } } - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); - while (s < end) { + case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } p++; - STAT_OP_OUT; + MOP_OUT; break; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); - while (s < end) { + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s); - if (n >1) { + n = enclen(encode, s); + if (n > 1) { DATA_ENSURE(n); sprev = s; s += n; @@ -2043,36 +1859,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } p++; - STAT_OP_OUT; + MOP_OUT; break; #ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); GET_STATE_CHECK_NUM_INC(mem, p); - while (s < end) { + while (DATA_ENSURE_CHECK1) { STATE_CHECK_VAL(scv, mem); if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } - STAT_OP_OUT; + MOP_OUT; break; case OP_STATE_CHECK_ANYCHAR_ML_STAR: - STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); GET_STATE_CHECK_NUM_INC(mem, p); - while (s < end) { + while (DATA_ENSURE_CHECK1) { STATE_CHECK_VAL(scv, mem); if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enc_len(encode, s); + n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -2083,29 +1899,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, s++; } } - STAT_OP_OUT; + MOP_OUT; break; #endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_WORD: STAT_OP_IN(OP_WORD); + case OP_WORD: MOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s); - STAT_OP_OUT; + s += enclen(encode, s); + MOP_OUT; break; - case OP_NOT_WORD: STAT_OP_IN(OP_NOT_WORD); + case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s); - STAT_OP_OUT; + s += enclen(encode, s); + MOP_OUT; break; - case OP_WORD_BOUND: STAT_OP_IN(OP_WORD_BOUND); + case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); if (ON_STR_BEGIN(s)) { DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) @@ -2120,13 +1936,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, == ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND); + case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; } else if (ON_STR_END(s)) { @@ -2138,25 +1954,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, != ONIGENC_IS_MBC_WORD(encode, sprev, end)) goto fail; } - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) { + case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } } goto fail; break; - case OP_WORD_END: STAT_OP_IN(OP_WORD_END); + case OP_WORD_END: MOP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } } @@ -2164,80 +1980,81 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, break; #endif - case OP_BEGIN_BUF: STAT_OP_IN(OP_BEGIN_BUF); + case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_END_BUF: STAT_OP_IN(OP_END_BUF); + case OP_END_BUF: MOP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_BEGIN_LINE: STAT_OP_IN(OP_BEGIN_LINE); + case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - STAT_OP_OUT; + MOP_OUT; continue; } goto fail; break; - case OP_END_LINE: STAT_OP_IN(OP_END_LINE); + case OP_END_LINE: MOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - STAT_OP_OUT; + MOP_OUT; continue; } #endif goto fail; break; - case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); + case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ - STAT_OP_OUT; + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enc_len(encode, s))) { - STAT_OP_OUT; + ON_STR_END(s + enclen(encode, s))) { + MOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - UChar* ss = s + enc_len(encode, s); - if (ON_STR_END(ss + enc_len(encode, ss))) { - STAT_OP_OUT; + UChar* ss = s + enclen(encode, s); + ss += enclen(encode, ss); + if (ON_STR_END(ss)) { + MOP_OUT; continue; } } @@ -2245,79 +2062,79 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto fail; break; - case OP_BEGIN_POSITION: STAT_OP_IN(OP_BEGIN_POSITION); + case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); if (s != msa->start) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_START_PUSH: STAT_OP_IN(OP_MEMORY_START_PUSH); + case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START); + case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (StackIndex )((void* )s); - STAT_OP_OUT; + mem_start_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; continue; break; - case OP_MEMORY_END_PUSH: STAT_OP_IN(OP_MEMORY_END_PUSH); + case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END); + case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (StackIndex )((void* )s); - STAT_OP_OUT; + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; continue; break; #ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: STAT_OP_IN(OP_MEMORY_END_PUSH_REC); + case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC); + case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (StackIndex )((void* )s); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); if (BIT_STATUS_AT(reg->bt_mem_start, mem)) mem_start_stk[mem] = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - STAT_OP_OUT; + MOP_OUT; continue; break; #endif - case OP_BACKREF1: STAT_OP_IN(OP_BACKREF1); + case OP_BACKREF1: MOP_IN(OP_BACKREF1); mem = 1; goto backref; break; - case OP_BACKREF2: STAT_OP_IN(OP_BACKREF2); + case OP_BACKREF2: MOP_IN(OP_BACKREF2); mem = 2; goto backref; break; - case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); + case OP_BACKREFN: MOP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); backref: { @@ -2342,15 +2159,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREFN_IC: STAT_OP_IN(OP_BACKREFN_IC); + case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); GET_MEMNUM_INC(mem, p); { int len; @@ -2373,16 +2190,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, n = pend - pstart; DATA_ENSURE(n); sprev = s; - STRING_CMP_IC(ambig_flag, pstart, &s, n); - while (sprev + (len = enc_len(encode, sprev)) < s) + STRING_CMP_IC(case_fold_flag, pstart, &s, n); + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREF_MULTI: STAT_OP_IN(OP_BACKREF_MULTI); + case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2409,19 +2226,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); break; /* success */ } if (i == tlen) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; - case OP_BACKREF_MULTI_IC: STAT_OP_IN(OP_BACKREF_MULTI_IC); + case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2445,23 +2262,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, DATA_ENSURE(n); sprev = s; swork = s; - STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail); + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); break; /* success */ } if (i == tlen) goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; -#ifdef USE_BACKREF_AT_LEVEL - case OP_BACKREF_AT_LEVEL: +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_WITH_LEVEL: { int len; OnigOptionType ic; @@ -2472,9 +2289,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, GET_LENGTH_INC(tlen, p); sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag - , (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enc_len(encode, sprev)) < s) + if (backref_match_at_nested_level(reg, stk, stk_base, ic + , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * tlen); @@ -2482,35 +2299,37 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, else goto fail; - STAT_OP_OUT; + MOP_OUT; continue; } break; #endif - - case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); + +#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ + case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); STACK_PUSH_ALT(p, s, sprev); p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION); + case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); GET_OPTION_INC(option, p); - STAT_OP_OUT; + MOP_OUT; continue; break; +#endif - case OP_NULL_CHECK_START: STAT_OP_IN(OP_NULL_CHECK_START); + case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_NULL_CHECK_START(mem, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_NULL_CHECK_END: STAT_OP_IN(OP_NULL_CHECK_END); + case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); { int isnull; @@ -2540,12 +2359,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } } } - STAT_OP_OUT; + MOP_OUT; continue; break; -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK - case OP_NULL_CHECK_END_MEMST: STAT_OP_IN(OP_NULL_CHECK_END_MEMST); +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; @@ -2560,19 +2379,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto null_check_found; } } - STAT_OP_OUT; + MOP_OUT; continue; break; #endif #ifdef USE_SUBEXP_CALL case OP_NULL_CHECK_END_MEMST_PUSH: - STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); { int isnull; GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); #else STACK_NULL_CHECK_REC(isnull, mem, s); @@ -2589,39 +2408,39 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STACK_PUSH_NULL_CHECK_END(mem); } } - STAT_OP_OUT; + MOP_OUT; continue; break; #endif - case OP_JUMP: STAT_OP_IN(OP_JUMP); + case OP_JUMP: MOP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_PUSH: STAT_OP_IN(OP_PUSH); + case OP_PUSH: MOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; #ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH); + case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); GET_STATE_CHECK_NUM_INC(mem, p); STATE_CHECK_VAL(scv, mem); if (scv) goto fail; GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); GET_STATE_CHECK_NUM_INC(mem, p); GET_RELADDR_INC(addr, p); STATE_CHECK_VAL(scv, mem); @@ -2631,54 +2450,54 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, else { STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK); + case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); GET_STATE_CHECK_NUM_INC(mem, p); STATE_CHECK_VAL(scv, mem); if (scv) goto fail; STACK_PUSH_STATE_CHECK(s, mem); - STAT_OP_OUT; + MOP_OUT; continue; break; #endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_POP: STAT_OP_IN(OP_POP); + case OP_POP: MOP_IN(OP_POP); STACK_POP_ONE; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1); + case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); - if (*p == *s && DATA_ENSURE_CHECK(1)) { + if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; } p += (addr + 1); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_IF_PEEK_NEXT: STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT); + case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; } p++; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT: STAT_OP_IN(OP_REPEAT); + case OP_REPEAT: MOP_IN(OP_REPEAT); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2691,11 +2510,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STACK_PUSH_ALT(p + addr, s, sprev); } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT_NG: STAT_OP_IN(OP_REPEAT_NG); + case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2709,11 +2528,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p += addr; } } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC); + case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2731,19 +2550,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG); + case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; break; - case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); + case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2765,68 +2584,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - STAT_OP_OUT; + MOP_OUT; CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG); + case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; break; - case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS); + case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); STACK_PUSH_POS(s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_POP_POS: STAT_OP_IN(OP_POP_POS); + case OP_POP_POS: MOP_IN(OP_POP_POS); { STACK_POS_END(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_POS_NOT: STAT_OP_IN(OP_PUSH_POS_NOT); + case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); GET_RELADDR_INC(addr, p); STACK_PUSH_POS_NOT(p + addr, s, sprev); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_FAIL_POS: STAT_OP_IN(OP_FAIL_POS); + case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); STACK_POP_TIL_POS_NOT; goto fail; break; - case OP_PUSH_STOP_BT: STAT_OP_IN(OP_PUSH_STOP_BT); + case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); STACK_PUSH_STOP_BT; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_POP_STOP_BT: STAT_OP_IN(OP_POP_STOP_BT); + case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); STACK_STOP_BT_END; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); + case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); + case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -2841,28 +2660,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_FAIL_LOOK_BEHIND_NOT: STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT); + case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); STACK_POP_TIL_LOOK_BEHIND_NOT; goto fail; break; #ifdef USE_SUBEXP_CALL - case OP_CALL: STAT_OP_IN(OP_CALL); + case OP_CALL: MOP_IN(OP_CALL); GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - STAT_OP_OUT; + MOP_OUT; continue; break; - case OP_RETURN: STAT_OP_IN(OP_RETURN); + case OP_RETURN: MOP_IN(OP_RETURN); STACK_RETURN(p); STACK_PUSH_RETURN; - STAT_OP_OUT; + MOP_OUT; continue; break; #endif @@ -2872,9 +2691,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, break; fail: - STAT_OP_OUT; + MOP_OUT; /* fall */ - case OP_FAIL: STAT_OP_IN(OP_FAIL); + case OP_FAIL: MOP_IN(OP_FAIL); STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; @@ -2887,7 +2706,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, } #endif - STAT_OP_OUT; + MOP_OUT; continue; break; @@ -2943,32 +2762,25 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, if (t == target_end) return s; } - s += enc_len(enc, s); + s += enclen(enc, s); } return (UChar* )NULL; } static int -str_lower_case_match(OnigEncoding enc, int ambig_flag, +str_lower_case_match(OnigEncoding enc, int case_fold_flag, const UChar* t, const UChar* tend, const UChar* p, const UChar* end) { int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; - const UChar* tsave; - const UChar* psave; - - tsave = t; - psave = p; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; while (t < tend) { - lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); + lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { - if (*t++ != *q++) { - return 0; - } + if (*t++ != *q++) return 0; lowlen--; } } @@ -2977,7 +2789,7 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag, } static UChar* -slow_search_ic(OnigEncoding enc, int ambig_flag, +slow_search_ic(OnigEncoding enc, int case_fold_flag, UChar* target, UChar* target_end, const UChar* text, const UChar* text_end, UChar* text_range) { @@ -2991,10 +2803,11 @@ slow_search_ic(OnigEncoding enc, int ambig_flag, s = (UChar* )text; while (s < end) { - if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) return s; - s += enc_len(enc, s); + s += enclen(enc, s); } return (UChar* )NULL; @@ -3033,7 +2846,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, } static UChar* -slow_search_backward_ic(OnigEncoding enc, int ambig_flag, +slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, UChar* target, UChar* target_end, const UChar* text, const UChar* adjust_text, const UChar* text_end, const UChar* text_start) @@ -3048,7 +2861,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag, s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); while (s >= text) { - if (str_lower_case_match(enc, ambig_flag, + if (str_lower_case_match(enc, case_fold_flag, target, target_end, s, text_end)) return s; @@ -3084,15 +2897,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = se = s + tlen1; t = tail; - while (t >= target && *p == *t) { - p--; t--; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } - if (t < target) return (UChar* )s; - skip = reg->map[*se]; t = s; do { - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } while ((s - t) < skip && s < end); } } @@ -3100,15 +2912,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = se = s + tlen1; t = tail; - while (t >= target && *p == *t) { - p--; t--; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } - if (t < target) return (UChar* )s; - skip = reg->int_map[*se]; t = s; do { - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } while ((s - t) < skip && s < end); } } @@ -3133,10 +2944,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s; t = tail; - while (t >= target && *p == *t) { + while (*p == *t) { + if (t == target) return (UChar* )p; p--; t--; } - if (t < target) return (UChar* )(p + 1); s += reg->map[*s]; } } @@ -3144,10 +2955,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, while (s < end) { p = s; t = tail; - while (t >= target && *p == *t) { + while (*p == *t) { + if (t == target) return (UChar* )p; p--; t--; } - if (t < target) return (UChar* )(p + 1); s += reg->int_map[*s]; } } @@ -3155,7 +2966,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, } static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip) +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + int** skip) { int i, len; @@ -3213,7 +3025,7 @@ map_search(OnigEncoding enc, UChar map[], while (s < text_range) { if (map[*s]) return (UChar* )s; - s += enc_len(enc, s); + s += enclen(enc, s); } return (UChar* )NULL; } @@ -3239,7 +3051,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On { int r; UChar *prev; - MatchArg msa; + OnigMatchArg msa; #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) start: @@ -3275,7 +3087,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On #endif if (region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3286,7 +3098,11 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On if (r == 0) { prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, at, prev, &msa); + r = match_at(reg, str, end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + end, +#endif + at, prev, &msa); } MATCH_ARG_FREE(msa); @@ -3312,7 +3128,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; - while (p < q) p += enc_len(reg->enc, p); + while (p < q) p += enclen(reg->enc, p); } } @@ -3322,7 +3138,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_ic(reg->enc, reg->ambig_flag, + p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; @@ -3343,7 +3159,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, if (p - reg->dmin < s) { retry_gate: pprev = p; - p += enc_len(reg->enc, p); + p += enclen(reg->enc, p); goto retry; } @@ -3362,10 +3178,12 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, case ANCHOR_END_LINE: if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = (UChar* )onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; +#endif } else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -3443,7 +3261,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, break; case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_backward_ic(reg->enc, reg->ambig_flag, + p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; @@ -3484,12 +3302,14 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ANCHOR_END_LINE: if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(prev)) goto fail; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; } +#endif } else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -3532,8 +3352,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, { int r; UChar *s, *prev; - MatchArg msa; + OnigMatchArg msa; const UChar *orig_start = start; +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar *orig_range = range; +#endif #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) start: @@ -3567,7 +3390,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #endif if (region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3577,8 +3400,32 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; + +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#else #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK \ +#define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -3589,7 +3436,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } #else -#define MATCH_AND_RETURN_CHECK \ +#define MATCH_AND_RETURN_CHECK(none) \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ @@ -3597,7 +3444,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, }\ else goto finish; /* error */ \ } -#endif +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + /* anchor optimize: resume search range */ if (reg->anchor != 0 && str < end) { @@ -3700,10 +3549,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_ARG_INIT(msa, option, region, start); #ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ #endif - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(end); goto mismatch; } goto mismatch_no_msa; @@ -3754,9 +3603,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, prev = low_prev; } while (s <= high) { - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } } while (s < range); goto mismatch; @@ -3767,13 +3616,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { do { - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } } while (s < range); goto mismatch; @@ -3782,16 +3631,21 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } do { - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); } while (s < range); if (s == range) { /* because empty match with /$/. */ - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_range); } } else { /* backward search */ +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + if (orig_start < end) + orig_start += enclen(reg->enc, orig_start); /* is upper range */ +#endif + if (reg->optimize != ONIG_OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; @@ -3814,7 +3668,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, while (s >= low) { prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_start); s = prev; } } while (s >= range); @@ -3842,7 +3696,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, do { prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK; + MATCH_AND_RETURN_CHECK(orig_start); s = prev; } while (s >= range); } @@ -3865,7 +3719,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ if (IS_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3906,10 +3760,10 @@ onig_get_options(regex_t* reg) return reg->options; } -extern OnigAmbigType -onig_get_ambig_flag(regex_t* reg) +extern OnigCaseFoldType +onig_get_case_fold_flag(regex_t* reg) { - return reg->ambig_flag; + return reg->case_fold_flag; } extern OnigSyntaxType* |