diff options
Diffstat (limited to 'ext/mbstring/oniguruma/regcomp.c')
-rw-r--r-- | ext/mbstring/oniguruma/regcomp.c | 2070 |
1 files changed, 1140 insertions, 930 deletions
diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index 6a0976dee2..f9d99563b3 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,26 +29,28 @@ #include "regparse.h" -OnigAmbigType OnigDefaultAmbigFlag = - (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE); +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; -extern OnigAmbigType -onig_get_default_ambig_flag(void) +extern OnigCaseFoldType +onig_get_default_case_fold_flag(void) { - return OnigDefaultAmbigFlag; + return OnigDefaultCaseFoldFlag; } extern int -onig_set_default_ambig_flag(OnigAmbigType ambig_flag) +onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) { - OnigDefaultAmbigFlag = ambig_flag; + OnigDefaultCaseFoldFlag = case_fold_flag; return 0; } +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + static UChar* -k_strdup(UChar* s, UChar* end) +str_dup(UChar* s, UChar* end) { int len = end - s; @@ -62,15 +64,29 @@ k_strdup(UChar* s, UChar* end) else return NULL; } -/* - Caution: node should not be a string node. - (s and end member address break) -*/ static void swap_node(Node* a, Node* b) { Node c; c = *a; *a = *b; *b = c; + + if (NTYPE(a) == NT_STR) { + StrNode* sn = NSTR(a); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } + + if (NTYPE(b) == NT_STR) { + StrNode* sn = NSTR(b); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } } static OnigDistance @@ -99,7 +115,7 @@ static int bitset_is_empty(BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { + for (i = 0; i < (int )BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } return 1; @@ -122,8 +138,14 @@ bitset_on_num(BitSetRef bs) extern int onig_bbuf_init(BBuf* buf, int size) { - buf->p = (UChar* )xmalloc(size); - if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + if (size <= 0) { + size = 0; + buf->p = NULL; + } + else { + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + } buf->alloc = size; buf->used = 0; @@ -139,7 +161,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size) UnsetAddr* p; p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); uslist->num = 0; uslist->alloc = size; uslist->us = p; @@ -162,7 +184,7 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) if (uslist->num >= uslist->alloc) { size = uslist->alloc * 2; p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); uslist->alloc = size; uslist->us = p; } @@ -394,8 +416,8 @@ compile_tree_n_times(Node* node, int n, regex_t* reg) } static int -add_compile_string_length(UChar* s, int mb_len, int str_len, - regex_t* reg, int ignore_case) +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, + regex_t* reg ARG_UNUSED, int ignore_case) { int len; int op = select_str_opcode(mb_len, str_len, ignore_case); @@ -440,20 +462,20 @@ compile_length_string_node(Node* node, regex_t* reg) UChar *p, *prev; StrNode* sn; - sn = &(NSTRING(node)); + sn = NSTR(node); if (sn->end <= sn->s) return 0; ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p); + prev_len = enclen(enc, p); p += prev_len; slen = 1; rlen = 0; for (; p < sn->end; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (len == prev_len) { slen++; } @@ -488,7 +510,7 @@ compile_string_node(Node* node, regex_t* reg) UChar *p, *prev, *end; StrNode* sn; - sn = &(NSTRING(node)); + sn = NSTR(node); if (sn->end <= sn->s) return 0; @@ -496,12 +518,12 @@ compile_string_node(Node* node, regex_t* reg) ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p); + prev_len = enclen(enc, p); p += prev_len; slen = 1; for (; p < end; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (len == prev_len) { slen++; } @@ -535,8 +557,6 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) add_length(reg, mbuf->used); return add_bytes(reg, mbuf->p, mbuf->used); #else - static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; - int r, pad_size; UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; @@ -558,7 +578,7 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg) { int len; - if (IS_CCLASS_SHARE(cc)) { + if (IS_NCCLASS_SHARE(cc)) { len = SIZE_OPCODE + SIZE_POINTER; return len; } @@ -588,14 +608,14 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) { int r; - if (IS_CCLASS_SHARE(cc)) { + if (IS_NCCLASS_SHARE(cc)) { add_opcode(reg, OP_CCLASS_NODE); r = add_pointer(reg, cc); return r; } if (IS_NULL(cc->mbuf)) { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_NOT); else add_opcode(reg, OP_CCLASS); @@ -604,7 +624,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) } else { if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MB_NOT); else add_opcode(reg, OP_CCLASS_MB); @@ -612,7 +632,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) r = add_multi_byte_cclass(cc->mbuf, reg); } else { - if (IS_CCLASS_NOT(cc)) + if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_MIX_NOT); else add_opcode(reg, OP_CCLASS_MIX); @@ -635,7 +655,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) if (reg->repeat_range_alloc == 0) { p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); reg->repeat_range = p; reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; } @@ -644,7 +664,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n); - CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(p); reg->repeat_range = p; reg->repeat_range_alloc = n; } @@ -658,7 +678,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } static int -compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, +compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, regex_t* reg) { int r; @@ -694,10 +714,10 @@ compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, } static int -is_anychar_star_quantifier(QuantifierNode* qn) +is_anychar_star_quantifier(QtfrNode* qn) { if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == N_ANYCHAR) + NTYPE(qn->target) == NT_CANY) return 1; else return 0; @@ -709,7 +729,7 @@ is_anychar_star_quantifier(QuantifierNode* qn) #ifdef USE_COMBINATION_EXPLOSION_CHECK static int -compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) { int len, mod_tlen, cklen; int ckn; @@ -724,7 +744,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); /* anychar repeat */ - if (NTYPE(qn->target) == N_ANYCHAR) { + if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; @@ -789,7 +809,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) } static int -compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_quantifier_node(QtfrNode* qn, regex_t* reg) { int r, mod_tlen; int ckn; @@ -815,7 +835,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) if (r) return r; } - return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) { @@ -943,7 +963,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) #else /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -953,7 +973,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) if (tlen < 0) return tlen; /* anychar repeat */ - if (NTYPE(qn->target) == N_ANYCHAR) { + if (NTYPE(qn->target) == NT_CANY) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; @@ -1008,7 +1028,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) } static int -compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +compile_quantifier_node(QtfrNode* qn, regex_t* reg) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -1026,7 +1046,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); if (r) return r; - return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) @@ -1067,7 +1087,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, mod_tlen + SIZE_OP_JUMP); if (r) return r; - add_bytes(reg, NSTRING(qn->head_exact).s, 1); + add_bytes(reg, NSTR(qn->head_exact)->s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, @@ -1077,7 +1097,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, mod_tlen + SIZE_OP_JUMP); if (r) return r; - add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); r = compile_tree_empty_check(qn->target, reg, empty_info); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, @@ -1136,7 +1156,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg) #endif /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_option_node(EffectNode* node, regex_t* reg) +compile_length_option_node(EncloseNode* node, regex_t* reg) { int tlen; OnigOptionType prev = reg->options; @@ -1156,7 +1176,7 @@ compile_length_option_node(EffectNode* node, regex_t* reg) } static int -compile_option_node(EffectNode* node, regex_t* reg) +compile_option_node(EncloseNode* node, regex_t* reg) { int r; OnigOptionType prev = reg->options; @@ -1182,12 +1202,12 @@ compile_option_node(EffectNode* node, regex_t* reg) } static int -compile_length_effect_node(EffectNode* node, regex_t* reg) +compile_length_enclose_node(EncloseNode* node, regex_t* reg) { int len; int tlen; - if (node->type == EFFECT_OPTION) + if (node->type == ENCLOSE_OPTION) return compile_length_option_node(node, reg); if (node->target) { @@ -1198,16 +1218,16 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) tlen = 0; switch (node->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { len = SIZE_OP_MEMORY_START_PUSH + tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } else @@ -1223,9 +1243,9 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) } break; - case EFFECT_STOP_BACKTRACK: - if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QuantifierNode* qn = &NQUANTIFIER(node->target); + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; @@ -1248,17 +1268,17 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) static int get_char_length_tree(Node* node, regex_t* reg, int* len); static int -compile_effect_node(EffectNode* node, regex_t* reg) +compile_enclose_node(EncloseNode* node, regex_t* reg) { int r, len; - if (node->type == EFFECT_OPTION) + if (node->type == ENCLOSE_OPTION) return compile_option_node(node, reg); switch (node->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r) return r; node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; @@ -1268,10 +1288,10 @@ compile_effect_node(EffectNode* node, regex_t* reg) len = compile_length_tree(node->target, reg); len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_EFFECT_RECURSION(node) + len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); r = add_opcode_rel_addr(reg, OP_JUMP, len); @@ -1288,12 +1308,12 @@ compile_effect_node(EffectNode* node, regex_t* reg) r = compile_tree(node->target, reg); if (r) return r; #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CALLED(node)) { + if (IS_ENCLOSE_CALLED(node)) { if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); else - r = add_opcode(reg, (IS_EFFECT_RECURSION(node) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END)); if (r) return r; @@ -1313,9 +1333,9 @@ compile_effect_node(EffectNode* node, regex_t* reg) } break; - case EFFECT_STOP_BACKTRACK: - if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QuantifierNode* qn = &NQUANTIFIER(node->target); + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; @@ -1472,50 +1492,50 @@ compile_length_tree(Node* node, regex_t* reg) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: len = 0; do { - r = compile_length_tree(NCONS(node).left, reg); + r = compile_length_tree(NCAR(node), reg); if (r < 0) return r; len += r; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); r = len; break; - case N_ALT: + case NT_ALT: { int n; n = r = 0; do { - r += compile_length_tree(NCONS(node).left, reg); + r += compile_length_tree(NCAR(node), reg); n++; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); } break; - case N_STRING: + case NT_STR: if (NSTRING_IS_RAW(node)) - r = compile_length_string_raw_node(&(NSTRING(node)), reg); + r = compile_length_string_raw_node(NSTR(node), reg); else r = compile_length_string_node(node, reg); break; - case N_CCLASS: - r = compile_length_cclass_node(&(NCCLASS(node)), reg); + case NT_CCLASS: + r = compile_length_cclass_node(NCCLASS(node), reg); break; - case N_CTYPE: - case N_ANYCHAR: + case NT_CTYPE: + case NT_CANY: r = SIZE_OPCODE; break; - case N_BACKREF: + case NT_BREF: { - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); @@ -1533,21 +1553,21 @@ compile_length_tree(Node* node, regex_t* reg) break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: r = SIZE_OP_CALL; break; #endif - case N_QUANTIFIER: - r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg); + case NT_QTFR: + r = compile_length_quantifier_node(NQTFR(node), reg); break; - case N_EFFECT: - r = compile_length_effect_node(&NEFFECT(node), reg); + case NT_ENCLOSE: + r = compile_length_enclose_node(NENCLOSE(node), reg); break; - case N_ANCHOR: - r = compile_length_anchor_node(&(NANCHOR(node)), reg); + case NT_ANCHOR: + r = compile_length_anchor_node(NANCHOR(node), reg); break; default: @@ -1565,59 +1585,61 @@ compile_tree(Node* node, regex_t* reg) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: do { - r = compile_tree(NCONS(node).left, reg); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = compile_tree(NCAR(node), reg); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { Node* x = node; len = 0; do { - len += compile_length_tree(NCONS(x).left, reg); - if (NCONS(x).right != NULL) { + len += compile_length_tree(NCAR(x), reg); + if (NCDR(x) != NULL) { len += SIZE_OP_PUSH + SIZE_OP_JUMP; } - } while (IS_NOT_NULL(x = NCONS(x).right)); + } while (IS_NOT_NULL(x = NCDR(x))); pos = reg->used + len; /* goal position */ do { - len = compile_length_tree(NCONS(node).left, reg); - if (IS_NOT_NULL(NCONS(node).right)) { + len = compile_length_tree(NCAR(node), reg); + if (IS_NOT_NULL(NCDR(node))) { r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); if (r) break; } - r = compile_tree(NCONS(node).left, reg); + r = compile_tree(NCAR(node), reg); if (r) break; - if (IS_NOT_NULL(NCONS(node).right)) { + if (IS_NOT_NULL(NCDR(node))) { len = pos - (reg->used + SIZE_OP_JUMP); r = add_opcode_rel_addr(reg, OP_JUMP, len); if (r) break; } - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_STRING: + case NT_STR: if (NSTRING_IS_RAW(node)) - r = compile_string_raw_node(&(NSTRING(node)), reg); + r = compile_string_raw_node(NSTR(node), reg); else r = compile_string_node(node, reg); break; - case N_CCLASS: - r = compile_cclass_node(&(NCCLASS(node)), reg); + case NT_CCLASS: + r = compile_cclass_node(NCCLASS(node), reg); break; - case N_CTYPE: + case NT_CTYPE: { int op; - switch (NCTYPE(node).type) { - case CTYPE_WORD: op = OP_WORD; break; - case CTYPE_NOT_WORD: op = OP_NOT_WORD; break; + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + else op = OP_WORD; + break; default: return ONIGERR_TYPE_BUG; break; @@ -1626,20 +1648,20 @@ compile_tree(Node* node, regex_t* reg) } break; - case N_ANYCHAR: + case NT_CANY: if (IS_MULTILINE(reg->options)) r = add_opcode(reg, OP_ANYCHAR_ML); else r = add_opcode(reg, OP_ANYCHAR); break; - case N_BACKREF: + case NT_BREF: { - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { - r = add_opcode(reg, OP_BACKREF_AT_LEVEL); + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); if (r) return r; r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); if (r) return r; @@ -1681,7 +1703,7 @@ compile_tree(Node* node, regex_t* reg) } if (r) return r; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL add_bacref_mems: #endif r = add_length(reg, br->back_num); @@ -1696,21 +1718,21 @@ compile_tree(Node* node, regex_t* reg) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - r = compile_call(&(NCALL(node)), reg); + case NT_CALL: + r = compile_call(NCALL(node), reg); break; #endif - case N_QUANTIFIER: - r = compile_quantifier_node(&(NQUANTIFIER(node)), reg); + case NT_QTFR: + r = compile_quantifier_node(NQTFR(node), reg); break; - case N_EFFECT: - r = compile_effect_node(&NEFFECT(node), reg); + case NT_ENCLOSE: + r = compile_enclose_node(NENCLOSE(node), reg); break; - case N_ANCHOR: - r = compile_anchor_node(&(NANCHOR(node)), reg); + case NT_ANCHOR: + r = compile_anchor_node(NANCHOR(node), reg); break; default: @@ -1732,29 +1754,29 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) Node* node = *plink; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = noname_disable_map(&(NCONS(node).left), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = noname_disable_map(&(NCAR(node)), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: + case NT_QTFR: { - Node** ptarget = &(NQUANTIFIER(node).target); + Node** ptarget = &(NQTFR(node)->target); Node* old = *ptarget; r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) { + if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { onig_reduce_nested_quantifier(node, *ptarget); } } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); - if (en->type == EFFECT_MEMORY) { - if (IS_EFFECT_NAMED_GROUP(en)) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + if (IS_ENCLOSE_NAMED_GROUP(en)) { (*counter)++; map[en->regnum].new_val = *counter; en->regnum = *counter; @@ -1784,7 +1806,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map) { int i, pos, n, old_num; int *backs; - BackrefNode* bn = &(NBACKREF(node)); + BRefNode* bn = NBREF(node); if (! IS_BACKREF_NAME_REF(bn)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; @@ -1813,20 +1835,20 @@ renumber_by_map(Node* node, GroupNumRemap* map) int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = renumber_by_map(NCONS(node).left, map); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = renumber_by_map(NCAR(node), map); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = renumber_by_map(NQUANTIFIER(node).target, map); + case NT_QTFR: + r = renumber_by_map(NQTFR(node)->target, map); break; - case N_EFFECT: - r = renumber_by_map(NEFFECT(node).target, map); + case NT_ENCLOSE: + r = renumber_by_map(NENCLOSE(node)->target, map); break; - case N_BACKREF: + case NT_BREF: r = renumber_node_backref(node, map); break; @@ -1843,21 +1865,21 @@ numbered_ref_check(Node* node) int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = numbered_ref_check(NCONS(node).left); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = numbered_ref_check(NCAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = numbered_ref_check(NQUANTIFIER(node).target); + case NT_QTFR: + r = numbered_ref_check(NQTFR(node)->target); break; - case N_EFFECT: - r = numbered_ref_check(NEFFECT(node).target); + case NT_ENCLOSE: + r = numbered_ref_check(NENCLOSE(node)->target); break; - case N_BACKREF: - if (! IS_BACKREF_NAME_REF(&(NBACKREF(node)))) + case NT_BREF: + if (! IS_BACKREF_NAME_REF(NBREF(node))) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; @@ -1876,7 +1898,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) GroupNumRemap* map; map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); - CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(map); for (i = 1; i <= env->num_mem; i++) { map[i].new_val = 0; } @@ -1914,12 +1936,12 @@ static int unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) { int i, offset; - EffectNode* en; + EncloseNode* en; AbsAddrType addr; for (i = 0; i < uslist->num; i++) { - en = &(NEFFECT(uslist->us[i].target)); - if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + en = NENCLOSE(uslist->us[i].target); + if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; addr = en->call_addr; offset = uslist->us[i].offset; @@ -1929,53 +1951,53 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) } #endif -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT static int quantifiers_memory_node_info(Node* node) { int r = 0; switch (NTYPE(node)) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: { int v; do { - v = quantifiers_memory_node_info(NCONS(node).left); + v = quantifiers_memory_node_info(NCAR(node)); if (v > r) r = v; - } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&NCALL(node))) { + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ } else - r = quantifiers_memory_node_info(NCALL(node).target); + r = quantifiers_memory_node_info(NCALL(node)->target); break; #endif - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { r = quantifiers_memory_node_info(qn->target); } } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: return NQ_TARGET_IS_EMPTY_MEM; break; - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = quantifiers_memory_node_info(en->target); break; default: @@ -1984,19 +2006,19 @@ quantifiers_memory_node_info(Node* node) } break; - case N_BACKREF: - case N_STRING: - case N_CTYPE: - case N_CCLASS: - case N_ANYCHAR: - case N_ANCHOR: + case NT_BREF: + case NT_STR: + case NT_CTYPE: + case NT_CCLASS: + case NT_CANY: + case NT_ANCHOR: default: break; } return r; } -#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */ +#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ static int get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) @@ -2006,12 +2028,12 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) *min = 0; switch (NTYPE(node)) { - case N_BACKREF: + case NT_BREF: { int i; int* backs; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) break; backs = BACKREFS_P(br); @@ -2028,62 +2050,57 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&NCALL(node))) { - EffectNode* en = &(NEFFECT(NCALL(node).target)); - if (IS_EFFECT_MIN_FIXED(en)) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + EncloseNode* en = NENCLOSE(NCALL(node)->target); + if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; } else - r = get_min_match_length(NCALL(node).target, min, env); + r = get_min_match_length(NCALL(node)->target, min, env); break; #endif - case N_LIST: + case NT_LIST: do { - r = get_min_match_length(NCONS(node).left, &tmin, env); + r = get_min_match_length(NCAR(node), &tmin, env); if (r == 0) *min += tmin; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { Node *x, *y; y = node; do { - x = NCONS(y).left; + x = NCAR(y); r = get_min_match_length(x, &tmin, env); if (r != 0) break; if (y == node) *min = tmin; else if (*min > tmin) *min = tmin; - } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right)); + } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); *min = sn->end - sn->s; } break; - case N_CTYPE: - switch (NCTYPE(node).type) { - case CTYPE_WORD: *min = 1; break; - case CTYPE_NOT_WORD: *min = 1; break; - default: - break; - } + case NT_CTYPE: + *min = 1; break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *min = 1; break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { r = get_min_match_length(qn->target, min, env); @@ -2093,32 +2110,32 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_MIN_FIXED(en)) + if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; else { r = get_min_match_length(en->target, min, env); if (r == 0) { en->min_len = *min; - SET_EFFECT_STATUS(node, NST_MIN_FIXED); + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_min_match_length(en->target, min, env); break; } } break; - case N_ANCHOR: + case NT_ANCHOR: default: break; } @@ -2134,51 +2151,43 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) *max = 0; switch (NTYPE(node)) { - case N_LIST: + case NT_LIST: do { - r = get_max_match_length(NCONS(node).left, &tmax, env); + r = get_max_match_length(NCAR(node), &tmax, env); if (r == 0) *max = distance_add(*max, tmax); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: do { - r = get_max_match_length(NCONS(node).left, &tmax, env); + r = get_max_match_length(NCAR(node), &tmax, env); if (r == 0 && *max < tmax) *max = tmax; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); *max = sn->end - sn->s; } break; - case N_CTYPE: - switch (NCTYPE(node).type) { - case CTYPE_WORD: - case CTYPE_NOT_WORD: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - default: - break; - } + case NT_CTYPE: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case N_BACKREF: + case NT_BREF: { int i; int* backs; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { *max = ONIG_INFINITE_DISTANCE; break; @@ -2194,17 +2203,17 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (! IS_CALL_RECURSION(&(NCALL(node)))) - r = get_max_match_length(NCALL(node).target, max, env); + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_max_match_length(NCALL(node)->target, max, env); else *max = ONIG_INFINITE_DISTANCE; break; #endif - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->upper != 0) { r = get_max_match_length(qn->target, max, env); @@ -2218,32 +2227,32 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_MAX_FIXED(en)) + if (IS_ENCLOSE_MAX_FIXED(en)) *max = en->max_len; else { r = get_max_match_length(en->target, max, env); if (r == 0) { en->max_len = *max; - SET_EFFECT_STATUS(node, NST_MAX_FIXED); + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_max_match_length(en->target, max, env); break; } } break; - case N_ANCHOR: + case NT_ANCHOR: default: break; } @@ -2264,22 +2273,22 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) level++; *len = 0; switch (NTYPE(node)) { - case N_LIST: + case NT_LIST: do { - r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); if (r == 0) *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: { int tlen2; int varlen = 0; - r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) { - r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level); + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { + r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); if (r == 0) { if (tlen != tlen2) varlen = 1; @@ -2298,20 +2307,20 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); UChar *s = sn->s; while (s < sn->end) { - s += enc_len(reg->enc, s); + s += enclen(reg->enc, s); (*len)++; } } break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower == qn->upper) { r = get_char_length_tree1(qn->target, reg, &tlen, level); if (r == 0) @@ -2323,47 +2332,42 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (! IS_CALL_RECURSION(&(NCALL(node)))) - r = get_char_length_tree1(NCALL(node).target, reg, len, level); + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_char_length_tree1(NCALL(node)->target, reg, len, level); else r = GET_CHAR_LEN_VARLEN; break; #endif - case N_CTYPE: - switch (NCTYPE(node).type) { - case CTYPE_WORD: - case CTYPE_NOT_WORD: - *len = 1; - break; - } + case NT_CTYPE: + *len = 1; break; - case N_CCLASS: - case N_ANYCHAR: + case NT_CCLASS: + case NT_CANY: *len = 1; break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_EFFECT_CLEN_FIXED(en)) + if (IS_ENCLOSE_CLEN_FIXED(en)) *len = en->char_len; else { r = get_char_length_tree1(en->target, reg, len, level); if (r == 0) { en->char_len = *len; - SET_EFFECT_STATUS(node, NST_CLEN_FIXED); + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); } } break; #endif - case EFFECT_OPTION: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: r = get_char_length_tree1(en->target, reg, len, level); break; default: @@ -2372,7 +2376,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_ANCHOR: + case NT_ANCHOR: break; default: @@ -2401,29 +2405,18 @@ is_not_included(Node* x, Node* y, regex_t* reg) retry: ytype = NTYPE(y); switch (NTYPE(x)) { - case N_CTYPE: + case NT_CTYPE: { switch (ytype) { - case N_CTYPE: - switch (NCTYPE(x).type) { - case CTYPE_WORD: - if (NCTYPE(y).type == CTYPE_NOT_WORD) - return 1; - else - return 0; - break; - case CTYPE_NOT_WORD: - if (NCTYPE(y).type == CTYPE_WORD) - return 1; - else - return 0; - break; - default: - break; - } + case NT_CTYPE: + if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && + NCTYPE(y)->not != NCTYPE(x)->not) + return 1; + else + return 0; break; - case N_CCLASS: + case NT_CCLASS: swap: { Node* tmp; @@ -2432,7 +2425,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: goto swap; break; @@ -2442,37 +2435,39 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* xc = &(NCCLASS(x)); + CClassNode* xc = NCCLASS(x); switch (ytype) { - case N_CTYPE: - switch (NCTYPE(y).type) { - case CTYPE_WORD: - if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(xc->bs, i)) { - if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0; + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(y)->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (IS_CODE_SB_WORD(reg->enc, i)) return 0; + } } + return 1; } - return 1; + return 0; } - return 0; - break; - case CTYPE_NOT_WORD: - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) { - if (!IS_CCLASS_NOT(xc)) { - if (BITSET_AT(xc->bs, i)) - return 0; - } - else { - if (! BITSET_AT(xc->bs, i)) - return 0; + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! IS_CODE_SB_WORD(reg->enc, i)) { + if (!IS_NCCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } } } + return 1; } - return 1; break; default: @@ -2480,29 +2475,29 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_CCLASS: + case NT_CCLASS: { int v; - CClassNode* yc = &(NCCLASS(y)); + CClassNode* yc = NCCLASS(y); for (i = 0; i < SINGLE_BYTE_SIZE; i++) { v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_CCLASS_NOT(xc)) || - (v == 0 && IS_CCLASS_NOT(xc))) { + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || + (v == 0 && IS_NCCLASS_NOT(xc))) { v = BITSET_AT(yc->bs, i); - if ((v != 0 && !IS_CCLASS_NOT(yc)) || - (v == 0 && IS_CCLASS_NOT(yc))) + if ((v != 0 && !IS_NCCLASS_NOT(yc)) || + (v == 0 && IS_NCCLASS_NOT(yc))) return 0; } } - if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) || - (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc))) + if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) return 1; return 0; } break; - case N_STRING: + case NT_STR: goto swap; break; @@ -2512,30 +2507,30 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: { - StrNode* xs = &(NSTRING(x)); + StrNode* xs = NSTR(x); if (NSTRING_LEN(x) == 0) break; c = *(xs->s); switch (ytype) { - case N_CTYPE: - switch (NCTYPE(y).type) { - case CTYPE_WORD: - return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1); - break; - case CTYPE_NOT_WORD: - return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0); + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) + return NCTYPE(y)->not; + else + return !(NCTYPE(y)->not); break; default: break; } break; - case N_CCLASS: + case NT_CCLASS: { - CClassNode* cc = &(NCCLASS(y)); + CClassNode* cc = NCCLASS(y); code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); @@ -2543,10 +2538,10 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case N_STRING: + case NT_STR: { UChar *q; - StrNode* ys = &(NSTRING(y)); + StrNode* ys = NSTR(y); len = NSTRING_LEN(x); if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { @@ -2580,40 +2575,34 @@ get_head_value_node(Node* node, int exact, regex_t* reg) Node* n = NULL_NODE; switch (NTYPE(node)) { - case N_BACKREF: - case N_ALT: - case N_ANYCHAR: + case NT_BREF: + case NT_ALT: + case NT_CANY: #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: #endif break; - case N_CTYPE: - case N_CCLASS: + case NT_CTYPE: + case NT_CCLASS: if (exact == 0) { n = node; } break; - case N_LIST: - n = get_head_value_node(NCONS(node).left, exact, reg); + case NT_LIST: + n = get_head_value_node(NCAR(node), exact, reg); break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); if (sn->end <= sn->s) break; if (exact != 0 && !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { -#if 0 - UChar* tmp = sn->s; - if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, - &tmp, sn->end)) - n = node; -#endif } else { n = node; @@ -2621,9 +2610,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_QUANTIFIER: + case NT_QTFR: { - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); if (qn->lower > 0) { if (IS_NOT_NULL(qn->head_exact)) n = qn->head_exact; @@ -2633,31 +2622,31 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType options = reg->options; - reg->options = NEFFECT(node).option; - n = get_head_value_node(NEFFECT(node).target, exact, reg); + reg->options = NENCLOSE(node)->option; + n = get_head_value_node(NENCLOSE(node)->target, exact, reg); reg->options = options; } break; - case EFFECT_MEMORY: - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_MEMORY: + case ENCLOSE_STOP_BACKTRACK: n = get_head_value_node(en->target, exact, reg); break; } } break; - case N_ANCHOR: - if (NANCHOR(node).type == ANCHOR_PREC_READ) - n = get_head_value_node(NANCHOR(node).target, exact, reg); + case NT_ANCHOR: + if (NANCHOR(node)->type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node)->target, exact, reg); break; default: @@ -2668,45 +2657,46 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } static int -check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask) +check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) { int type, r = 0; type = NTYPE(node); - if ((type & type_mask) == 0) + if ((NTYPE2BIT(type) & type_mask) == 0) return 1; switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = check_type_tree(NCAR(node), type_mask, enclose_mask, + anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask, + case NT_QTFR: + r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, anchor_mask); break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); - if ((en->type & effect_mask) == 0) + EncloseNode* en = NENCLOSE(node); + if ((en->type & enclose_mask) == 0) return 1; - r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask); + r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); } break; - case N_ANCHOR: - type = NANCHOR(node).type; + case NT_ANCHOR: + type = NANCHOR(node)->type; if ((type & anchor_mask) == 0) return 1; - if (NANCHOR(node).target) - r = check_type_tree(NANCHOR(node).target, - type_mask, effect_mask, anchor_mask); + if (NANCHOR(node)->target) + r = check_type_tree(NANCHOR(node)->target, + type_mask, enclose_mask, anchor_mask); break; default: @@ -2728,7 +2718,7 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node *x; OnigDistance min; @@ -2736,40 +2726,40 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) x = node; do { - ret = subexp_inf_recursive_check(NCONS(x).left, env, head); + ret = subexp_inf_recursive_check(NCAR(x), env, head); if (ret < 0 || ret == RECURSION_INFINITE) return ret; r |= ret; if (head) { - ret = get_min_match_length(NCONS(x).left, &min, env); + ret = get_min_match_length(NCAR(x), &min, env); if (ret != 0) return ret; if (min != 0) head = 0; } - } while (IS_NOT_NULL(x = NCONS(x).right)); + } while (IS_NOT_NULL(x = NCDR(x))); } break; - case N_ALT: + case NT_ALT: { int ret; r = RECURSION_EXIST; do { - ret = subexp_inf_recursive_check(NCONS(node).left, env, head); + ret = subexp_inf_recursive_check(NCAR(node), env, head); if (ret < 0 || ret == RECURSION_INFINITE) return ret; r &= ret; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: - r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head); + case NT_QTFR: + r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); if (r == RECURSION_EXIST) { - if (NQUANTIFIER(node).lower == 0) r = 0; + if (NQTFR(node)->lower == 0) r = 0; } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2781,19 +2771,19 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) } break; - case N_CALL: - r = subexp_inf_recursive_check(NCALL(node).target, env, head); + case NT_CALL: + r = subexp_inf_recursive_check(NCALL(node)->target, env, head); break; - case N_EFFECT: - if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) return 0; - else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); else { - SET_EFFECT_STATUS(node, NST_MARK2); - r = subexp_inf_recursive_check(NEFFECT(node).target, env, head); - CLEAR_EFFECT_STATUS(node, NST_MARK2); + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); } break; @@ -2812,20 +2802,20 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: do { - r = subexp_inf_recursive_check_trav(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = subexp_inf_recursive_check_trav(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env); + case NT_QTFR: + r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2837,15 +2827,15 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); - if (IS_EFFECT_RECURSION(en)) { - SET_EFFECT_STATUS(node, NST_MARK1); + if (IS_ENCLOSE_RECURSION(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); r = subexp_inf_recursive_check(en->target, env, 1); if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_EFFECT_STATUS(node, NST_MARK1); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); } r = subexp_inf_recursive_check_trav(en->target, env); } @@ -2862,25 +2852,23 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) static int subexp_recursive_check(Node* node) { - int type; int r = 0; - type = NTYPE(node); - switch (type) { - case N_LIST: - case N_ALT: + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: do { - r |= subexp_recursive_check(NCONS(node).left); - } while (IS_NOT_NULL(node = NCONS(node).right)); + r |= subexp_recursive_check(NCAR(node)); + } while (IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = subexp_recursive_check(NQUANTIFIER(node).target); + case NT_QTFR: + r = subexp_recursive_check(NQTFR(node)->target); break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2892,20 +2880,20 @@ subexp_recursive_check(Node* node) } break; - case N_CALL: - r = subexp_recursive_check(NCALL(node).target); + case NT_CALL: + r = subexp_recursive_check(NCALL(node)->target); if (r != 0) SET_CALL_RECURSION(node); break; - case N_EFFECT: - if (IS_EFFECT_MARK2(&(NEFFECT(node)))) + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) return 0; - else if (IS_EFFECT_MARK1(&(NEFFECT(node)))) + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) return 1; /* recursion */ else { - SET_EFFECT_STATUS(node, NST_MARK2); - r = subexp_recursive_check(NEFFECT(node).target); - CLEAR_EFFECT_STATUS(node, NST_MARK2); + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NENCLOSE(node)->target); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); } break; @@ -2927,29 +2915,29 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: + case NT_LIST: + case NT_ALT: { int ret; do { - ret = subexp_recursive_check_trav(NCONS(node).left, env); + ret = subexp_recursive_check_trav(NCAR(node), env); if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; else if (ret < 0) return ret; - } while (IS_NOT_NULL(node = NCONS(node).right)); + } while (IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: - r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env); - if (NQUANTIFIER(node).upper == 0) { + case NT_QTFR: + r = subexp_recursive_check_trav(NQTFR(node)->target, env); + if (NQTFR(node)->upper == 0) { if (r == FOUND_CALLED_NODE) - NQUANTIFIER(node).is_refered = 1; + NQTFR(node)->is_refered = 1; } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: case ANCHOR_PREC_READ_NOT: @@ -2961,20 +2949,20 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); - if (! IS_EFFECT_RECURSION(en)) { - if (IS_EFFECT_CALLED(en)) { - SET_EFFECT_STATUS(node, NST_MARK1); + if (! IS_ENCLOSE_RECURSION(en)) { + if (IS_ENCLOSE_CALLED(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); r = subexp_recursive_check(en->target); - if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION); - CLEAR_EFFECT_STATUS(node, NST_MARK1); + if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); } } r = subexp_recursive_check_trav(en->target, env); - if (IS_EFFECT_CALLED(en)) + if (IS_ENCLOSE_CALLED(en)) r |= FOUND_CALLED_NODE; } break; @@ -2994,46 +2982,33 @@ setup_subexp_call(Node* node, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: do { - r = setup_subexp_call(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_ALT: + case NT_ALT: do { - r = setup_subexp_call(NCONS(node).left, env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_QUANTIFIER: - r = setup_subexp_call(NQUANTIFIER(node).target, env); + case NT_QTFR: + r = setup_subexp_call(NQTFR(node)->target, env); break; - case N_EFFECT: - r = setup_subexp_call(NEFFECT(node).target, env); + case NT_ENCLOSE: + r = setup_subexp_call(NENCLOSE(node)->target, env); break; - case N_CALL: + case NT_CALL: { - int n, num, *refs; - UChar *p; - CallNode* cn = &(NCALL(node)); + CallNode* cn = NCALL(node); Node** nodes = SCANENV_MEM_NODES(env); -#ifdef USE_NAMED_GROUP - n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); -#else - n = -1; -#endif - if (n <= 0) { - /* name not found, check group number. (?*ddd) */ - p = cn->name; - num = onig_scan_unsigned_number(&p, cn->name_end, env->enc); - if (num <= 0 || p != cn->name_end) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } + if (cn->group_num != 0) { + int gnum = cn->group_num; + #ifdef USE_NAMED_GROUP if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && @@ -3041,38 +3016,53 @@ setup_subexp_call(Node* node, ScanEnv* env) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } #endif - if (num > env->num_mem) { + if (gnum > env->num_mem) { onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_GROUP_REFERENCE; } - cn->ref_num = num; - goto set_call_attr; - } - else if (n > 1) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->ref_num = refs[0]; + +#ifdef USE_NAMED_GROUP set_call_attr: - cn->target = nodes[cn->ref_num]; +#endif + cn->target = nodes[cn->group_num]; if (IS_NULL(cn->target)) { onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_NAME_REFERENCE; } - SET_EFFECT_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num); + SET_ENCLOSE_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); cn->unset_addr_list = env->unset_addr_list; } +#ifdef USE_NAMED_GROUP + else { + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, + &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } + } +#endif } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: @@ -3100,30 +3090,29 @@ setup_subexp_call(Node* node, ScanEnv* env) static int divide_look_behind_alternatives(Node* node) { - Node tmp_node; Node *head, *np, *insert_node; - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); int anc_type = an->type; head = an->target; - np = NCONS(head).left; - tmp_node = *node; *node = *head; *head = tmp_node; - NCONS(node).left = head; - NANCHOR(head).target = np; + np = NCAR(head); + swap_node(node, head); + NCAR(node) = head; + NANCHOR(head)->target = np; np = node; - while ((np = NCONS(np).right) != NULL_NODE) { + while ((np = NCDR(np)) != NULL_NODE) { insert_node = onig_node_new_anchor(anc_type); - CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY); - NANCHOR(insert_node).target = NCONS(np).left; - NCONS(np).left = insert_node; + CHECK_NULL_RETURN_MEMERR(insert_node); + NANCHOR(insert_node)->target = NCAR(np); + NCAR(np) = insert_node; } if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { np = node; do { - np->type = N_LIST; /* alt -> list */ - } while ((np = NCONS(np).right) != NULL_NODE); + SET_NTYPE(np, NT_LIST); /* alt -> list */ + } while ((np = NCDR(np)) != NULL_NODE); } return 0; } @@ -3132,7 +3121,7 @@ static int setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) { int r, len; - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); r = get_char_length_tree(an->target, reg, &len); if (r == 0) @@ -3156,11 +3145,15 @@ next_setup(Node* node, Node* next_node, regex_t* reg) retry: type = NTYPE(node); - if (type == N_QUANTIFIER) { - QuantifierNode* qn = &(NQUANTIFIER(node)); + if (type == NT_QTFR) { + QtfrNode* qn = NQTFR(node); if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QUANTIFIER_PEEK_NEXT - qn->next_head_exact = get_head_value_node(next_node, 1, reg); +#ifdef USE_QTFR_PEEK_NEXT + Node* n = get_head_value_node(next_node, 1, reg); + /* '\0': for UTF-16BE etc... */ + if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { + qn->next_head_exact = n; + } #endif /* automatic posseivation a*b ==> (?>a*)b */ if (qn->lower <= 1) { @@ -3171,20 +3164,20 @@ next_setup(Node* node, Node* next_node, regex_t* reg) if (IS_NOT_NULL(x)) { y = get_head_value_node(next_node, 0, reg); if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK); - CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY); - SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); swap_node(node, en); - NEFFECT(node).target = en; + NENCLOSE(node)->target = en; } } } } } } - else if (type == N_EFFECT) { - EffectNode* en = &(NEFFECT(node)); - if (en->type == EFFECT_MEMORY) { + else if (type == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { node = en->target; goto retry; } @@ -3194,100 +3187,318 @@ next_setup(Node* node, Node* next_node, regex_t* reg) static int -divide_ambig_string_node_sub(regex_t* reg, int prev_ambig, - UChar* prev_start, UChar* prev, - UChar* end, Node*** tailp, Node** root) +update_string_node_case_fold(regex_t* reg, Node *node) { - UChar *tmp, *wp; - Node* snode; + UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *sbuf, *ebuf, *sp; + int r, i, len, sbuf_size; + StrNode* sn = NSTR(node); + + end = sn->end; + sbuf_size = (end - sn->s) * 2; + sbuf = (UChar* )xmalloc(sbuf_size); + CHECK_NULL_RETURN_MEMERR(sbuf); + ebuf = sbuf + sbuf_size; - if (prev_ambig != 0) { - tmp = prev_start; - wp = prev_start; - while (tmp < prev) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); + sp = sbuf; + p = sn->s; + while (p < end) { + len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); + q = buf; + for (i = 0; i < len; i++) { + if (sp >= ebuf) { + sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); + CHECK_NULL_RETURN_MEMERR(sbuf); + sp = sbuf + sbuf_size; + sbuf_size *= 2; + ebuf = sbuf + sbuf_size; + } + + *sp++ = buf[i]; } - snode = onig_node_new_str(prev_start, wp); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - NSTRING_SET_AMBIG(snode); - if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); } - else { - snode = onig_node_new_str(prev_start, prev); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + + r = onig_node_str_set(node, sbuf, sp); + if (r != 0) { + xfree(sbuf); + return r; + } + + xfree(sbuf); + return 0; +} + +static int +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, + regex_t* reg) +{ + int r; + Node *node; + + node = onig_node_new_str(s, end); + if (IS_NULL(node)) return ONIGERR_MEMORY; + + r = update_string_node_case_fold(reg, node); + if (r != 0) { + onig_node_free(node); + return r; + } + + NSTRING_SET_AMBIG(node); + NSTRING_SET_DONT_GET_OPT_INFO(node); + *rnode = node; + return 0; +} + +static int +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], + UChar *p, int slen, UChar *end, + regex_t* reg, Node **rnode) +{ + int r, i, j, len, varlen; + Node *anode, *var_anode, *snode, *xnode, *an; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + + *rnode = var_anode = NULL_NODE; + + varlen = 0; + for (i = 0; i < item_num; i++) { + if (items[i].byte_len != slen) { + varlen = 1; + break; + } } - if (*tailp == (Node** )0) { - *root = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY); - *tailp = &(NCONS(*root).right); + if (varlen != 0) { + *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(var_anode)) return ONIGERR_MEMORY; + + xnode = onig_node_new_list(NULL, NULL); + if (IS_NULL(xnode)) goto mem_err; + NCAR(var_anode) = xnode; + + anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) goto mem_err; + NCAR(xnode) = anode; } else { - **tailp = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY); - *tailp = &(NCONS(**tailp).right); + *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) return ONIGERR_MEMORY; } - return 0; + snode = onig_node_new_str(p, p + slen); + if (IS_NULL(snode)) goto mem_err; + + NCAR(anode) = snode; + + for (i = 0; i < item_num; i++) { + snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + + for (j = 0; j < items[i].code_len; j++) { + len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); + if (len < 0) { + r = len; + goto mem_err2; + } + + r = onig_node_str_cat(snode, buf, buf + len); + if (r != 0) goto mem_err2; + } + + an = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(an)) { + goto mem_err2; + } + + if (items[i].byte_len != slen) { + Node *rem; + UChar *q = p + items[i].byte_len; + + if (q < end) { + r = expand_case_fold_make_rem_string(&rem, q, end, reg); + if (r != 0) { + onig_node_free(an); + goto mem_err2; + } + + xnode = onig_node_list_add(NULL_NODE, snode); + if (IS_NULL(xnode)) { + onig_node_free(an); + onig_node_free(rem); + goto mem_err2; + } + if (IS_NULL(onig_node_list_add(xnode, rem))) { + onig_node_free(an); + onig_node_free(xnode); + onig_node_free(rem); + goto mem_err; + } + + NCAR(an) = xnode; + } + else { + NCAR(an) = snode; + } + + NCDR(var_anode) = an; + var_anode = an; + } + else { + NCAR(an) = snode; + NCDR(anode) = an; + anode = an; + } + } + + return varlen; + + mem_err2: + onig_node_free(snode); + + mem_err: + onig_node_free(*rnode); + + return ONIGERR_MEMORY; } static int -divide_ambig_string_node(Node* node, regex_t* reg) +expand_case_fold_string(Node* node, regex_t* reg) { - StrNode* sn = &NSTRING(node); - int ambig, prev_ambig; - UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp; - Node *root = NULL_NODE; - Node **tailp = (Node** )0; - int r; +#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 - start = prev_start = p = sn->s; - end = sn->end; - if (p >= end) return 0; + int r, n, len, alt_num; + UChar *start, *end, *p; + Node *top_root, *root, *snode, *prev_node; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + StrNode* sn = NSTR(node); - prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end); + if (NSTRING_IS_AMBIG(node)) return 0; + start = sn->s; + end = sn->end; + if (start >= end) return 0; + + r = 0; + top_root = root = prev_node = snode = NULL_NODE; + alt_num = 1; + p = start; while (p < end) { - prev = p; - if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, - reg->ambig_flag, &p, end))) { + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, + p, end, items); + if (n < 0) { + r = n; + goto err; + } + + len = enclen(reg->enc, p); + + if (n == 0) { + if (IS_NULL(snode)) { + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } - r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev, - end, &tailp, &root); - if (r != 0) return r; + prev_node = snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, snode))) { + onig_node_free(snode); + goto mem_err; + } + } + } - prev_ambig = ambig; - prev_start = prev; + r = onig_node_str_cat(snode, p, p + len); + if (r != 0) goto err; } - } + else { + alt_num *= (n + 1); + if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } - if (prev_start == start) { - if (prev_ambig != 0) { - NSTRING_SET_AMBIG(node); - tmp = start; - wp = start; - while (tmp < end) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); + r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); + if (r < 0) goto mem_err; + if (r == 1) { + if (IS_NULL(root)) { + top_root = prev_node; + } + else { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + + root = NCAR(prev_node); + } + else { /* r == 0 */ + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } } - if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node); - sn->end = wp; + + snode = NULL_NODE; } + + p += len; } - else { - r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end, - end, &tailp, &root); - if (r != 0) return r; - swap_node(node, root); - onig_node_str_clear(root); /* should be after swap! */ - onig_node_free(root); /* free original string node */ + if (p < end) { + Node *srem; + + r = expand_case_fold_make_rem_string(&srem, p, end, reg); + if (r != 0) goto mem_err; + + if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(srem); + onig_node_free(prev_node); + goto mem_err; + } + } + + if (IS_NULL(root)) { + prev_node = srem; + } + else { + if (IS_NULL(onig_node_list_add(root, srem))) { + onig_node_free(srem); + goto mem_err; + } + } } + /* ending */ + top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); + swap_node(node, top_root); + onig_node_free(top_root); return 0; + + mem_err: + r = ONIGERR_MEMORY; + + err: + onig_node_free(top_root); + return r; } + #ifdef USE_COMBINATION_EXPLOSION_CHECK #define CEC_THRES_NUM_BIG_REPEAT 512 @@ -3305,31 +3516,31 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node* prev = NULL_NODE; do { - r = setup_comb_exp_check(NCONS(node).left, r, env); - prev = NCONS(node).left; - } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_comb_exp_check(NCAR(node), r, env); + prev = NCAR(node); + } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_ALT: + case NT_ALT: { int ret; do { - ret = setup_comb_exp_check(NCONS(node).left, state, env); + ret = setup_comb_exp_check(NCAR(node), state, env); r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right)); + } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_QUANTIFIER: + case NT_QTFR: { int child_state = state; int add_state = 0; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); Node* target = qn->target; int var_num; @@ -3340,11 +3551,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == N_EFFECT) { - EffectNode* en = &(NEFFECT(qn->target)); - if (en->type == EFFECT_MEMORY) { - if (NTYPE(en->target) == N_QUANTIFIER) { - QuantifierNode* q = &(NQUANTIFIER(en->target)); + if (NTYPE(qn->target) == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(qn->target); + if (en->type == ENCLOSE_MEMORY) { + if (NTYPE(en->target) == NT_QTFR) { + QtfrNode* q = NQTFR(en->target); if (IS_REPEAT_INFINITE(q->upper) && q->greedy == qn->greedy) { qn->upper = (qn->lower == 0 ? 1 : qn->lower); @@ -3390,12 +3601,12 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: { if (env->curr_max_regnum < en->regnum) env->curr_max_regnum = en->regnum; @@ -3412,11 +3623,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&(NCALL(node)))) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) env->has_recursion = 1; else - r = setup_comb_exp_check(NCALL(node).target, state, env); + r = setup_comb_exp_check(NCALL(node)->target, state, env); break; #endif @@ -3449,68 +3660,68 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { Node* prev = NULL_NODE; do { - r = setup_tree(NCONS(node).left, reg, state, env); + r = setup_tree(NCAR(node), reg, state, env); if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCONS(node).left, reg); + r = next_setup(prev, NCAR(node), reg); } - prev = NCONS(node).left; - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + prev = NCAR(node); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); } break; - case N_ALT: + case NT_ALT: do { - r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env); - } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); + r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); break; - case N_CCLASS: + case NT_CCLASS: break; - case N_STRING: + case NT_STR: if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = divide_ambig_string_node(node, reg); + r = expand_case_fold_string(node, reg); } break; - case N_CTYPE: - case N_ANYCHAR: + case NT_CTYPE: + case NT_CANY: break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: break; #endif - case N_BACKREF: + case NT_BREF: { int i; int* p; Node** nodes = SCANENV_MEM_NODES(env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); p = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); } #endif - SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } break; - case N_QUANTIFIER: + case NT_QTFR: { OnigDistance d; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); Node* target = qn->target; if ((state & IN_REPEAT) != 0) { @@ -3522,7 +3733,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r) break; if (d == 0) { qn->target_empty_info = NQ_TARGET_IS_EMPTY; -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT r = quantifiers_memory_node_info(target); if (r < 0) break; if (r > 0) { @@ -3535,7 +3746,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* ()* ==> ()?, ()+ ==> () */ qn->upper = 1; if (qn->lower > 1) qn->lower = 1; - if (NTYPE(target) == N_STRING) { + if (NTYPE(target) == NT_STR) { qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ } } @@ -3551,29 +3762,29 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 - if (NTYPE(target) == N_STRING) { + if (NTYPE(target) == NT_STR) { if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { int len = NSTRING_LEN(target); - StrNode* sn = &(NSTRING(target)); + StrNode* sn = NSTR(target); if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { int i, n = qn->lower; - onig_node_conv_to_str_node(node, NSTRING(target).flag); + onig_node_conv_to_str_node(node, NSTR(target)->flag); for (i = 0; i < n; i++) { r = onig_node_str_cat(node, sn->s, sn->end); if (r) break; } onig_node_free(target); - break; /* break case N_QUANTIFIER: */ + break; /* break case NT_QTFR: */ } } } #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == N_QUANTIFIER) { - QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); if (IS_NOT_NULL(tqn->head_exact)) { qn->head_exact = tqn->head_exact; tqn->head_exact = NULL; @@ -3587,39 +3798,39 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType options = reg->options; - reg->options = NEFFECT(node).option; - r = setup_tree(NEFFECT(node).target, reg, state, env); + reg->options = NENCLOSE(node)->option; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); reg->options = options; } break; - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */ + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ } r = setup_tree(en->target, reg, state, env); break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: { Node* target = en->target; r = setup_tree(target, reg, state, env); - if (NTYPE(target) == N_QUANTIFIER) { - QuantifierNode* tqn = &(NQUANTIFIER(target)); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && tqn->greedy != 0) { /* (?>a*), a*+ etc... */ int qtype = NTYPE(tqn->target); if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); } } } @@ -3628,9 +3839,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case N_ANCHOR: + case NT_ANCHOR: { - AnchorNode* an = &(NANCHOR(node)); + AnchorNode* an = NANCHOR(node); switch (an->type) { case ANCHOR_PREC_READ: @@ -3642,11 +3853,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ - ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \ - N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL ) + ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ + BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) -#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY ) -#define ALLOWED_EFFECT_IN_LB_NOT 0 +#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) +#define ALLOWED_ENCLOSE_IN_LB_NOT 0 #define ALLOWED_ANCHOR_IN_LB \ ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) @@ -3656,7 +3867,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ANCHOR_LOOK_BEHIND: { r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB); + ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); @@ -3668,7 +3879,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ANCHOR_LOOK_BEHIND_NOT: { r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; r = setup_look_behind(node, reg, env); @@ -3689,7 +3900,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Boyer-Moor search */ static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[], int** int_skip) { int i, len; @@ -3722,11 +3933,11 @@ typedef struct { } MinMaxLen; typedef struct { - MinMaxLen mmd; - OnigEncoding enc; - OnigOptionType options; - OnigAmbigType ambig_flag; - ScanEnv* scan_env; + MinMaxLen mmd; + OnigEncoding enc; + OnigOptionType options; + OnigCaseFoldType case_fold_flag; + ScanEnv* scan_env; } OptEnv; typedef struct { @@ -3778,7 +3989,7 @@ map_position_value(OnigEncoding enc, int i) 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 }; - if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) { + if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) return 20; else @@ -3810,7 +4021,7 @@ distance_value(MinMaxLen* mm) if (mm->max == ONIG_INFINITE_DISTANCE) return 0; d = mm->max - mm->min; - if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) + if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) /* return dist_vals[d] * 16 / (mm->min + 12); */ return (int )dist_vals[d]; else @@ -4003,7 +4214,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) p = add->s; end = p + add->len; for (i = to->len; p < end; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4018,14 +4229,14 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) } static void -concat_opt_exact_info_str(OptExactInfo* to, - UChar* s, UChar* end, int raw, OnigEncoding enc) +concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) { int i, j, len; UChar *p; for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - len = enc_len(enc, p); + len = enclen(enc, p); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4051,7 +4262,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) for (i = 0; i < to->len && i < add->len; ) { if (to->s[i] != add->s[i]) break; - len = enc_len(env->enc, to->s + i); + len = enclen(env->enc, to->s + i); for (j = 1; j < len; j++) { if (to->s[i+j] != add->s[i+j]) break; @@ -4146,29 +4357,23 @@ add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) static int add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, - OnigEncoding enc, OnigAmbigType ambig_flag) + OnigEncoding enc, OnigCaseFoldType case_fold_flag) { - int i, n, len; - UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN]; - OnigCodePoint code; - const OnigPairAmbigCodes* pccs; - OnigAmbigType amb; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int i, n; add_char_opt_map_info(map, p[0], enc); - code = ONIGENC_MBC_TO_CODE(enc, p, end); - for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { - if ((amb & ambig_flag) == 0) continue; + case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); + if (n < 0) return n; - n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs); - for (i = 0; i < n; i++) { - if (pccs[i].from == code) { - len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf); - if (len < 0) return len; - add_char_opt_map_info(map, buf[0], enc); - } - } + for (i = 0; i < n; i++) { + ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); + add_char_opt_map_info(map, buf[0], enc); } + return 0; } @@ -4341,7 +4546,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) type = NTYPE(node); switch (type) { - case N_LIST: + case NT_LIST: { OptEnv nenv; NodeOptInfo nopt; @@ -4349,33 +4554,33 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_opt_env(&nenv, env); do { - r = optimize_node_left(NCONS(nd).left, &nopt, &nenv); + r = optimize_node_left(NCAR(nd), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); concat_left_node_opt_info(env->enc, opt, &nopt); } - } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right)); + } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); } break; - case N_ALT: + case NT_ALT: { NodeOptInfo nopt; Node* nd = node; do { - r = optimize_node_left(NCONS(nd).left, &nopt, env); + r = optimize_node_left(NCAR(nd), &nopt, env); if (r == 0) { if (nd == node) copy_node_opt_info(opt, &nopt); else alt_merge_node_opt_info(opt, &nopt, env); } - } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right)); + } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); } break; - case N_STRING: + case NT_STR: { - StrNode* sn = &(NSTRING(node)); + StrNode* sn = NSTR(node); int slen = sn->end - sn->s; int is_raw = NSTRING_IS_RAW(node); @@ -4388,25 +4593,26 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) set_mml(&opt->len, slen, slen); } else { - int n, max; + int max; - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); - opt->exb.ignore_case = 1; + if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + int n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + } + else { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->case_fold_flag); + if (r != 0) break; + } - if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->ambig_flag); - if (r != 0) break; + max = slen; } - if (NSTRING_IS_AMBIG_REDUCE(node)) { - n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; - } - else { - max = slen; - } set_mml(&opt->len, slen, max); } @@ -4415,14 +4621,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_CCLASS: + case NT_CCLASS: { int i, z; - CClassNode* cc = &(NCCLASS(node)); + CClassNode* cc = NCCLASS(node); /* no need to check ignore case. (setted in setup_tree()) */ - if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) { + if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -4431,7 +4637,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { z = BITSET_AT(cc->bs, i); - if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) { + if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { add_char_opt_map_info(&opt->map, (UChar )i, env->enc); } } @@ -4440,7 +4646,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_CTYPE: + case NT_CTYPE: { int i, min, max; @@ -4449,21 +4655,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (max == 1) { min = 1; - switch (NCTYPE(node).type) { - case CTYPE_NOT_WORD: - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - break; - - case CTYPE_WORD: - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } break; } } @@ -4474,7 +4681,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_ANYCHAR: + case NT_CANY: { OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); @@ -4482,22 +4689,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_ANCHOR: - switch (NANCHOR(node).type) { + case NT_ANCHOR: + switch (NANCHOR(node)->type) { case ANCHOR_BEGIN_BUF: case ANCHOR_BEGIN_POSITION: case ANCHOR_BEGIN_LINE: case ANCHOR_END_BUF: case ANCHOR_SEMI_END_BUF: case ANCHOR_END_LINE: - add_opt_anc_info(&opt->anc, NANCHOR(node).type); + add_opt_anc_info(&opt->anc, NANCHOR(node)->type); break; case ANCHOR_PREC_READ: { NodeOptInfo nopt; - r = optimize_node_left(NANCHOR(node).target, &nopt, env); + r = optimize_node_left(NANCHOR(node)->target, &nopt, env); if (r == 0) { if (nopt.exb.len > 0) copy_opt_exact_info(&opt->expr, &nopt.exb); @@ -4519,13 +4726,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_BACKREF: + case NT_BREF: { int i; int* backs; OnigDistance min, max, tmin, tmax; Node** nodes = SCANENV_MEM_NODES(env->scan_env); - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); if (br->state & NST_RECURSION) { set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); @@ -4549,31 +4756,31 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; #ifdef USE_SUBEXP_CALL - case N_CALL: - if (IS_CALL_RECURSION(&(NCALL(node)))) + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); else { OnigOptionType save = env->options; - env->options = NEFFECT(NCALL(node).target).option; - r = optimize_node_left(NCALL(node).target, opt, env); + env->options = NENCLOSE(NCALL(node)->target)->option; + r = optimize_node_left(NCALL(node)->target, opt, env); env->options = save; } break; #endif - case N_QUANTIFIER: + case NT_QTFR: { int i; OnigDistance min, max; NodeOptInfo nopt; - QuantifierNode* qn = &(NQUANTIFIER(node)); + QtfrNode* qn = NQTFR(node); r = optimize_node_left(qn->target, &nopt, env); if (r) break; if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && - NTYPE(qn->target) == N_ANYCHAR && qn->greedy) { + NTYPE(qn->target) == NT_CANY && qn->greedy) { if (IS_MULTILINE(env->options)) add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else @@ -4585,7 +4792,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_node_opt_info(opt, &nopt); if (nopt.exb.len > 0) { if (nopt.exb.reach_end) { - for (i = 2; i < qn->lower && + for (i = 2; i <= qn->lower && ! is_full_opt_exact_info(&opt->exb); i++) { concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); } @@ -4614,12 +4821,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case N_EFFECT: + case NT_ENCLOSE: { - EffectNode* en = &(NEFFECT(node)); + EncloseNode* en = NENCLOSE(node); switch (en->type) { - case EFFECT_OPTION: + case ENCLOSE_OPTION: { OnigOptionType save = env->options; @@ -4629,7 +4836,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case EFFECT_MEMORY: + case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { @@ -4637,8 +4844,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) min = 0; max = ONIG_INFINITE_DISTANCE; - if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len; - if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len; + if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; + if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; set_mml(&opt->len, min, max); } else @@ -4653,7 +4860,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: r = optimize_node_left(en->target, opt, env); break; } @@ -4681,7 +4888,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case) { reg->exact = (UChar* )xmalloc(e->len); - CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + CHECK_NULL_RETURN_MEMERR(reg->exact); xmemcpy(reg->exact, e->s, e->len); reg->exact_end = reg->exact + e->len; reg->optimize = ONIG_OPTIMIZE_EXACT_IC; @@ -4689,8 +4896,8 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) else { int allow_reverse; - reg->exact = k_strdup(e->s, e->s + e->len); - CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); + reg->exact = str_dup(e->s, e->s + e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); reg->exact_end = reg->exact + e->len; allow_reverse = @@ -4755,9 +4962,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) NodeOptInfo opt; OptEnv env; - env.enc = reg->enc; - env.options = reg->options; - env.ambig_flag = reg->ambig_flag; + env.enc = reg->enc; + env.options = reg->options; + env.case_fold_flag = reg->case_fold_flag; env.scan_env = scan_env; clear_mml(&env.mmd); @@ -4839,7 +5046,7 @@ static void print_enc_string(FILE* fp, OnigEncoding enc, fputc((int )code, fp); } - p += enc_len(enc, p); + p += enclen(enc, p); } } else { @@ -4971,19 +5178,21 @@ print_optimize_info(FILE* f, regex_t* reg) #endif /* ONIG_DEBUG */ -static void +extern void onig_free_body(regex_t* reg) { - if (IS_NOT_NULL(reg->p)) xfree(reg->p); - if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); - if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); - if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); - if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + if (IS_NOT_NULL(reg)) { + if (IS_NOT_NULL(reg->p)) xfree(reg->p); + if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); + if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); + if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); + if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); #ifdef USE_NAMED_GROUP - onig_names_free(reg); + onig_names_free(reg); #endif + } } extern void @@ -5043,84 +5252,6 @@ onig_chain_reduce(regex_t* reg) } } -#if 0 -extern int -onig_clone(regex_t** to, regex_t* from) -{ - int r, size; - regex_t* reg; - -#ifdef USE_MULTI_THREAD_SYSTEM - if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(from); - if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(from); - ONIG_STATE_INC(from); - } - } - else { - int n = 0; - while (ONIG_STATE(from) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - ONIG_STATE_INC(from); - } -#endif /* USE_MULTI_THREAD_SYSTEM */ - - r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, - from->enc, ONIG_SYNTAX_DEFAULT); - if (r != 0) { - ONIG_STATE_DEC(from); - return r; - } - - xmemcpy(reg, from, sizeof(onig_t)); - reg->chain = (regex_t* )NULL; - reg->state = ONIG_STATE_NORMAL; - - if (from->p) { - reg->p = (UChar* )xmalloc(reg->alloc); - if (IS_NULL(reg->p)) goto mem_error; - xmemcpy(reg->p, from->p, reg->alloc); - } - - if (from->exact) { - reg->exact = (UChar* )xmalloc(from->exact_end - from->exact); - if (IS_NULL(reg->exact)) goto mem_error; - reg->exact_end = reg->exact + (from->exact_end - from->exact); - xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact); - } - - if (from->int_map) { - size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; - reg->int_map = (int* )xmalloc(size); - if (IS_NULL(reg->int_map)) goto mem_error; - xmemcpy(reg->int_map, from->int_map, size); - } - - if (from->int_map_backward) { - size = sizeof(int) * ONIG_CHAR_TABLE_SIZE; - reg->int_map_backward = (int* )xmalloc(size); - if (IS_NULL(reg->int_map_backward)) goto mem_error; - xmemcpy(reg->int_map_backward, from->int_map_backward, size); - } - -#ifdef USE_NAMED_GROUP - reg->name_table = names_clone(from); /* names_clone is not implemented */ -#endif - - ONIG_STATE_DEC(from); - *to = reg; - return 0; - - mem_error: - ONIG_STATE_DEC(from); - return ONIGERR_MEMORY; -} -#endif - #ifdef ONIG_DEBUG static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); #endif @@ -5141,6 +5272,8 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, UnsetAddrList uslist; #endif + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + reg->state = ONIG_STATE_COMPILING; #ifdef ONIG_DEBUG @@ -5182,10 +5315,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } #endif -#ifdef ONIG_DEBUG_PARSE_TREE - print_tree(stderr, root); -#endif - #ifdef USE_SUBEXP_CALL if (scan_env.num_call > 0) { r = unset_addr_list_init(&uslist, scan_env.num_call); @@ -5207,6 +5336,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = setup_tree(root, reg, 0, &scan_env); if (r != 0) goto err_unset; +#ifdef ONIG_DEBUG_PARSE_TREE + print_tree(stderr, root); +#endif + reg->capture_history = scan_env.capture_history; reg->bt_mem_start = scan_env.bt_mem_start; reg->bt_mem_start |= reg->capture_history; @@ -5308,7 +5441,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } } - if (IS_NOT_NULL(root)) onig_node_free(root); + onig_node_free(root); if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) xfree(scan_env.mem_nodes_dynamic); return r; @@ -5338,12 +5471,16 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, static int onig_inited = 0; extern int -onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, - OnigEncoding enc, OnigSyntaxType* syntax) +onig_reg_init(regex_t* reg, OnigOptionType option, + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, OnigSyntaxType* syntax) { if (! onig_inited) onig_init(); + if (IS_NULL(reg)) + return ONIGERR_INVALID_ARGUMENT; + if (ONIGENC_IS_UNDEF(enc)) return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; @@ -5352,9 +5489,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; } - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return ONIGERR_MEMORY; - (*reg)->state = ONIG_STATE_MODIFY; + (reg)->state = ONIG_STATE_MODIFY; if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { option |= syntax->options; @@ -5363,24 +5498,36 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, else option |= syntax->options; - (*reg)->enc = enc; - (*reg)->options = option; - (*reg)->syntax = syntax; - (*reg)->optimize = 0; - (*reg)->exact = (UChar* )NULL; - (*reg)->int_map = (int* )NULL; - (*reg)->int_map_backward = (int* )NULL; - (*reg)->chain = (regex_t* )NULL; + (reg)->enc = enc; + (reg)->options = option; + (reg)->syntax = syntax; + (reg)->optimize = 0; + (reg)->exact = (UChar* )NULL; + (reg)->int_map = (int* )NULL; + (reg)->int_map_backward = (int* )NULL; + (reg)->chain = (regex_t* )NULL; + + (reg)->p = (UChar* )NULL; + (reg)->alloc = 0; + (reg)->used = 0; + (reg)->name_table = (void* )NULL; + + (reg)->case_fold_flag = case_fold_flag; + return 0; +} - (*reg)->p = (UChar* )NULL; - (*reg)->alloc = 0; - (*reg)->used = 0; - (*reg)->name_table = (void* )NULL; +extern int +onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax, OnigErrorInfo* einfo) +{ + int r; - (*reg)->ambig_flag = ambig_flag; - (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc); + r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) return r; - return 0; + r = onig_compile(reg, pattern, pattern_end, einfo); + return r; } extern int @@ -5390,33 +5537,35 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, { int r; - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; - r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT, - enc, syntax); - if (r) return r; + r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) goto err; r = onig_compile(*reg, pattern, pattern_end, einfo); if (r) { + err: onig_free(*reg); *reg = NULL; } return r; } + extern int onig_init(void) { if (onig_inited != 0) return 0; - onig_inited = 1; - THREAD_SYSTEM_INIT; THREAD_ATOMIC_START; + onig_inited = 1; + onigenc_init(); - onigenc_set_default_caseconv_table((UChar* )0); + /* onigenc_set_default_caseconv_table((UChar* )0); */ #ifdef ONIG_DEBUG_STATISTICS onig_statistics_init(); @@ -5430,8 +5579,6 @@ onig_init(void) extern int onig_end(void) { - extern int onig_free_shared_cclass_table(void); - THREAD_ATOMIC_START; #ifdef ONIG_DEBUG_STATISTICS @@ -5442,7 +5589,7 @@ onig_end(void) onig_free_shared_cclass_table(); #endif -#ifdef USE_RECYCLE_NODE +#ifdef USE_PARSE_TREE_NODE_RECYCLE onig_free_node_list(); #endif @@ -5453,6 +5600,64 @@ onig_end(void) return 0; } +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +extern int +onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_NCCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; + } + else { + len = ONIGENC_CODE_TO_MBCLEN(enc, code); + } + return onig_is_code_in_cc_len(len, code, cc); +} + #ifdef ONIG_DEBUG @@ -5514,7 +5719,7 @@ OnigOpInfoType OnigOpInfo[] = { { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, @@ -5706,7 +5911,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, break; case OP_EXACT1_IC: - len = enc_len(enc, bp); + len = enclen(enc, bp); p_string(f, len, bp); bp += len; break; @@ -5781,7 +5986,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, } break; - case OP_BACKREF_AT_LEVEL: + case OP_BACKREF_WITH_LEVEL: { OnigOptionType option; LengthType level; @@ -5889,27 +6094,27 @@ print_indent_tree(FILE* f, Node* node, int indent) type = NTYPE(node); switch (type) { - case N_LIST: - case N_ALT: - if (NTYPE(node) == N_LIST) + case NT_LIST: + case NT_ALT: + if (NTYPE(node) == NT_LIST) fprintf(f, "<list:%x>\n", (int )node); else fprintf(f, "<alt:%x>\n", (int )node); - print_indent_tree(f, NCONS(node).left, indent + add); - while (IS_NOT_NULL(node = NCONS(node).right)) { + print_indent_tree(f, NCAR(node), indent + add); + while (IS_NOT_NULL(node = NCDR(node))) { if (NTYPE(node) != type) { fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); exit(0); } - print_indent_tree(f, NCONS(node).left, indent + add); + print_indent_tree(f, NCAR(node), indent + add); } break; - case N_STRING: + case NT_STR: fprintf(f, "<string%s:%x>", (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); - for (p = NSTRING(node).s; p < NSTRING(node).end; p++) { + for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); else { @@ -5918,11 +6123,11 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_CCLASS: + case NT_CCLASS: fprintf(f, "<cclass:%x>", (int )node); - if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f); - if (NCCLASS(node).mbuf) { - BBuf* bbuf = NCCLASS(node).mbuf; + if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); + if (NCCLASS(node)->mbuf) { + BBuf* bbuf = NCCLASS(node)->mbuf; for (i = 0; i < bbuf->used; i++) { if (i > 0) fprintf(f, ","); fprintf(f, "%0x", bbuf->p[i]); @@ -5930,24 +6135,29 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_CTYPE: + case NT_CTYPE: fprintf(f, "<ctype:%x> ", (int )node); - switch (NCTYPE(node).type) { - case CTYPE_WORD: fputs("word", f); break; - case CTYPE_NOT_WORD: fputs("not word", f); break; + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) + fputs("not word", f); + else + fputs("word", f); + break; + default: fprintf(f, "ERROR: undefined ctype.\n"); exit(0); } break; - case N_ANYCHAR: + case NT_CANY: fprintf(f, "<anychar:%x>", (int )node); break; - case N_ANCHOR: + case NT_ANCHOR: fprintf(f, "<anchor:%x> ", (int )node); - switch (NANCHOR(node).type) { + switch (NANCHOR(node)->type) { case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; case ANCHOR_END_BUF: fputs("end buf", f); break; case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; @@ -5972,10 +6182,10 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case N_BACKREF: + case NT_BREF: { int* p; - BackrefNode* br = &(NBACKREF(node)); + BRefNode* br = NBREF(node); p = BACKREFS_P(br); fprintf(f, "<backref:%x>", (int )node); for (i = 0; i < br->back_num; i++) { @@ -5986,33 +6196,33 @@ print_indent_tree(FILE* f, Node* node, int indent) break; #ifdef USE_SUBEXP_CALL - case N_CALL: + case NT_CALL: { - CallNode* cn = &(NCALL(node)); + CallNode* cn = NCALL(node); fprintf(f, "<call:%x>", (int )node); p_string(f, cn->name_end - cn->name, cn->name); } break; #endif - case N_QUANTIFIER: + case NT_QTFR: fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node, - NQUANTIFIER(node).lower, NQUANTIFIER(node).upper, - (NQUANTIFIER(node).greedy ? "" : "?")); - print_indent_tree(f, NQUANTIFIER(node).target, indent + add); + NQTFR(node)->lower, NQTFR(node)->upper, + (NQTFR(node)->greedy ? "" : "?")); + print_indent_tree(f, NQTFR(node)->target, indent + add); break; - case N_EFFECT: - fprintf(f, "<effect:%x> ", (int )node); - switch (NEFFECT(node).type) { - case EFFECT_OPTION: - fprintf(f, "option:%d\n", NEFFECT(node).option); - print_indent_tree(f, NEFFECT(node).target, indent + add); + case NT_ENCLOSE: + fprintf(f, "<enclose:%x> ", (int )node); + switch (NENCLOSE(node)->type) { + case ENCLOSE_OPTION: + fprintf(f, "option:%d\n", NENCLOSE(node)->option); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); break; - case EFFECT_MEMORY: - fprintf(f, "memory:%d", NEFFECT(node).regnum); + case ENCLOSE_MEMORY: + fprintf(f, "memory:%d", NENCLOSE(node)->regnum); break; - case EFFECT_STOP_BACKTRACK: + case ENCLOSE_STOP_BACKTRACK: fprintf(f, "stop-bt"); break; @@ -6020,7 +6230,7 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } fprintf(f, "\n"); - print_indent_tree(f, NEFFECT(node).target, indent + add); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); break; default: @@ -6028,8 +6238,8 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } - if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER && - type != N_EFFECT) + if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && + type != NT_ENCLOSE) fprintf(f, "\n"); fflush(f); } |