diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-08-04 04:14:38 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-08-04 04:14:38 +0000 |
commit | 60ab77a16a89b0461205a3a6395009266a3ba279 (patch) | |
tree | 47b35d8e124d9e4bc4b24c0d95702f525d5729e0 /regparse.c | |
parent | 38107457a387811b1dae6bba6c2904385f52722e (diff) | |
download | bundler-60ab77a16a89b0461205a3a6395009266a3ba279.tar.gz |
Warn duplicated characters in character class of regexp. [ruby-core:24593]
* include/ruby/oniguruma.h (ONIG_SYN_WARN_CC_DUP): defined.
* regparse.h (ScanEnv): add warnings_flag.
* regparse.c (CC_DUP_WARN): defined for warn duplicated characters in
character class of regexp. [ruby-core:24593]
(add_code_range_to_buf): add CC_DUP_WARN.
(next_state_val): add CC_DUP_WARN.
(OnigSyntaxRuby): add ONIG_SYN_WARN_CC_DUP.
(SET_ALL_MULTI_BYTE_RANGE): add env to arguments.
(add_code_range): ditto.
(add_code_range_to_buf): ditto.
(not_code_range_buf): ditto.
(or_code_range_buf): ditto.
(and_code_range1): ditto.
(and_code_range_buf): ditto.
(and_cclass): ditto.
(or_cclass): ditto.
(add_ctype_to_cc_by_range): ditto.
(add_ctype_to_cc): ditto.
(parse_char_class): ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@24387 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regparse.c')
-rw-r--r-- | regparse.c | 130 |
1 files changed, 79 insertions, 51 deletions
diff --git a/regparse.c b/regparse.c index 2a8f984372..435e4de59d 100644 --- a/regparse.c +++ b/regparse.c @@ -57,6 +57,7 @@ const OnigSyntaxType OnigSyntaxRuby = { ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_CC_DUP | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) , ONIG_OPTION_NONE , @@ -96,6 +97,8 @@ extern void onig_set_verb_warn_func(OnigWarnFunc f) onig_verb_warn = f; } +static void CC_DUP_WARN(ScanEnv *env); + static void bbuf_free(BBuf* bbuf) { @@ -129,7 +132,7 @@ bbuf_clone(BBuf** rto, BBuf* from) (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ - add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) + add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ @@ -139,6 +142,11 @@ bbuf_clone(BBuf** rto, BBuf* from) } while (0) +#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \ + if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \ + BS_ROOM(bs, pos) |= BS_BIT(pos); \ +} while (0) + #define BITSET_IS_EMPTY(bs,empty) do {\ int i;\ empty = 1;\ @@ -150,11 +158,11 @@ bbuf_clone(BBuf** rto, BBuf* from) } while (0) static void -bitset_set_range(BitSetRef bs, int from, int to) +bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to) { int i; for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { - BITSET_SET_BIT(bs, i); + BITSET_SET_BIT_CHKDUP(bs, i); } } @@ -1689,7 +1697,7 @@ new_code_range(BBuf** pbuf) } static int -add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) +add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) { int r, inc_n, pos; int low, high, bound, x; @@ -1736,8 +1744,10 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) if (inc_n != 1) { if (from > data[low*2]) from = data[low*2]; + else CC_DUP_WARN(env); if (to < data[(high - 1)*2 + 1]) to = data[(high - 1)*2 + 1]; + else CC_DUP_WARN(env); } if (inc_n != 0 && (OnigCodePoint )high < n) { @@ -1773,11 +1783,11 @@ add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - return add_code_range_to_buf(pbuf, from, to); + return add_code_range_to_buf(pbuf, env, from, to); } static int -not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) +not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env) { int r, i, n; OnigCodePoint pre, from, *data, to = 0; @@ -1799,14 +1809,14 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) from = data[i*2]; to = data[i*2+1]; if (pre <= from - 1) { - r = add_code_range_to_buf(pbuf, pre, from - 1); + r = add_code_range_to_buf(pbuf, env, pre, from - 1); if (r != 0) return r; } if (to == ~((OnigCodePoint )0)) break; pre = to + 1; } if (to < ~((OnigCodePoint )0)) { - r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); + r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0)); } return r; } @@ -1820,7 +1830,7 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) static int or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, - BBuf* bbuf2, int not2, BBuf** pbuf) + BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) { int r; OnigCodePoint i, n1, *data1; @@ -1846,7 +1856,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, return bbuf_clone(pbuf, bbuf2); } else { - return not_code_range_buf(enc, bbuf2, pbuf); + return not_code_range_buf(enc, bbuf2, pbuf, env); } } } @@ -1862,21 +1872,21 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, r = bbuf_clone(pbuf, bbuf2); } else if (not1 == 0) { /* 1 OR (not 2) */ - r = not_code_range_buf(enc, bbuf2, pbuf); + r = not_code_range_buf(enc, bbuf2, pbuf, env); } if (r != 0) return r; for (i = 0; i < n1; i++) { from = data1[i*2]; to = data1[i*2+1]; - r = add_code_range_to_buf(pbuf, from, to); + r = add_code_range_to_buf(pbuf, env, from, to); if (r != 0) return r; } return 0; } static int -and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, +and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1, OnigCodePoint* data, int n) { int i, r; @@ -1894,7 +1904,7 @@ and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, else if (from2 <= to1) { if (to2 < to1) { if (from1 <= from2 - 1) { - r = add_code_range_to_buf(pbuf, from1, from2-1); + r = add_code_range_to_buf(pbuf, env, from1, from2-1); if (r != 0) return r; } from1 = to2 + 1; @@ -1909,14 +1919,14 @@ and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, if (from1 > to1) break; } if (from1 <= to1) { - r = add_code_range_to_buf(pbuf, from1, to1); + r = add_code_range_to_buf(pbuf, env, from1, to1); if (r != 0) return r; } return 0; } static int -and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) +and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) { int r; OnigCodePoint i, j, n1, n2, *data1, *data2; @@ -1955,7 +1965,7 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) if (to2 < from1) continue; from = MAX(from1, from2); to = MIN(to1, to2); - r = add_code_range_to_buf(pbuf, from, to); + r = add_code_range_to_buf(pbuf, env, from, to); if (r != 0) return r; } } @@ -1964,7 +1974,7 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) for (i = 0; i < n1; i++) { from1 = data1[i*2]; to1 = data1[i*2+1]; - r = and_code_range1(pbuf, from1, to1, data2, n2); + r = and_code_range1(pbuf, env, from1, to1, data2, n2); if (r != 0) return r; } } @@ -1973,8 +1983,9 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) } static int -and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) { + OnigEncoding enc = env->enc; int r, not1, not2; BBuf *buf1, *buf2, *pbuf; BitSetRef bsr1, bsr2; @@ -2006,13 +2017,13 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) if (! ONIGENC_IS_SINGLEBYTE(enc)) { if (not1 != 0 && not2 != 0) { - r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); + r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env); } else { - r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); + r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env); if (r == 0 && not1 != 0) { BBuf *tbuf; - r = not_code_range_buf(enc, pbuf, &tbuf); + r = not_code_range_buf(enc, pbuf, &tbuf, env); if (r != 0) { bbuf_free(pbuf); return r; @@ -2031,8 +2042,9 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) } static int -or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) { + OnigEncoding enc = env->enc; int r, not1, not2; BBuf *buf1, *buf2, *pbuf; BitSetRef bsr1, bsr2; @@ -2064,13 +2076,13 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) if (! ONIGENC_IS_SINGLEBYTE(enc)) { if (not1 != 0 && not2 != 0) { - r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); + r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env); } else { - r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); + r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env); if (r == 0 && not1 != 0) { BBuf *tbuf; - r = not_code_range_buf(enc, pbuf, &tbuf); + r = not_code_range_buf(enc, pbuf, &tbuf, env); if (r != 0) { bbuf_free(pbuf); return r; @@ -2839,6 +2851,22 @@ CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) } } +static void +CC_DUP_WARN(ScanEnv *env) +{ + UChar buf[WARN_BUFSIZE]; + if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) && + !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) { + (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"character class has duplicated range"); + (*onig_warn)((char* )buf); + } +} + static UChar* find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, UChar **next, OnigEncoding enc) @@ -3756,7 +3784,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) static int add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, - OnigEncoding enc ARG_UNUSED, + ScanEnv* env, OnigCodePoint sb_out, const OnigCodePoint mbr[]) { int i, r; @@ -3771,7 +3799,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, if (j >= sb_out) { if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++; else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), j, + r = add_code_range_to_buf(&(cc->mbuf), env, j, ONIGENC_CODE_RANGE_TO(mbr, i)); if (r != 0) return r; i++; @@ -3779,13 +3807,13 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, goto sb_end; } - BITSET_SET_BIT(cc->bs, j); + BITSET_SET_BIT_CHKDUP(cc->bs, j); } } sb_end: for ( ; i < n; i++) { - r = add_code_range_to_buf(&(cc->mbuf), + r = add_code_range_to_buf(&(cc->mbuf), env, ONIGENC_CODE_RANGE_FROM(mbr, i), ONIGENC_CODE_RANGE_TO(mbr, i)); if (r != 0) return r; @@ -3800,12 +3828,12 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, if (j >= sb_out) { goto sb_end2; } - BITSET_SET_BIT(cc->bs, j); + BITSET_SET_BIT_CHKDUP(cc->bs, j); } prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; } for (j = prev; j < sb_out; j++) { - BITSET_SET_BIT(cc->bs, j); + BITSET_SET_BIT_CHKDUP(cc->bs, j); } sb_end2: @@ -3813,14 +3841,14 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, for (i = 0; i < n; i++) { if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), prev, + r = add_code_range_to_buf(&(cc->mbuf), env, prev, ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); if (r != 0) return r; } prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; } if (prev < 0x7fffffff) { - r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); + r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff); if (r != 0) return r; } } @@ -3838,7 +3866,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { - return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); + return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); } else if (r != ONIG_NO_SUPPORT_CONFIG) { return r; @@ -3860,14 +3888,14 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) if (not != 0) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); + BITSET_SET_BIT_CHKDUP(cc->bs, c); } ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); + BITSET_SET_BIT_CHKDUP(cc->bs, c); } } break; @@ -3877,13 +3905,13 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) if (not != 0) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); + BITSET_SET_BIT_CHKDUP(cc->bs, c); } } else { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); + BITSET_SET_BIT_CHKDUP(cc->bs, c); } ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } @@ -3892,7 +3920,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_WORD: if (not == 0) { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); + if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c); } ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } @@ -3900,7 +3928,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) for (c = 0; c < SINGLE_BYTE_SIZE; c++) { if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ && ! ONIGENC_IS_CODE_WORD(enc, c)) - BITSET_SET_BIT(cc->bs, c); + BITSET_SET_BIT_CHKDUP(cc->bs, c); } } break; @@ -4065,7 +4093,7 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, if (*state == CCS_VALUE && *type != CCV_CLASS) { if (*type == CCV_SB) - BITSET_SET_BIT(cc->bs, (int )(*vs)); + BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; @@ -4088,7 +4116,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, switch (*state) { case CCS_VALUE: if (*type == CCV_SB) - BITSET_SET_BIT(cc->bs, (int )(*vs)); + BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); else if (*type == CCV_CODE_POINT) { r = add_code_range(&(cc->mbuf), env, *vs, *vs); if (r < 0) return r; @@ -4107,7 +4135,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, else return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - bitset_set_range(cc->bs, (int )*vs, (int )v); + bitset_set_range(env, cc->bs, (int )*vs, (int )v); } else { r = add_code_range(&(cc->mbuf), env, *vs, v); @@ -4124,7 +4152,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, else return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); + bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); if (r < 0) return r; #if 0 @@ -4407,7 +4435,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_char_class(&anode, tok, &p, end, env); if (r != 0) goto cc_open_err; acc = NCCLASS(anode); - r = or_cclass(cc, acc, env->enc); + r = or_cclass(cc, acc, env); onig_node_free(anode); cc_open_err: @@ -4427,7 +4455,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, state = CCS_START; if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env->enc); + r = and_cclass(prev_cc, cc, env); if (r != 0) goto err; bbuf_free(cc->mbuf); } @@ -4464,7 +4492,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env->enc); + r = and_cclass(prev_cc, cc, env); if (r != 0) goto err; bbuf_free(cc->mbuf); cc = prev_cc; @@ -4487,7 +4515,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) - BITSET_SET_BIT(cc->bs, NEWLINE_CODE); + BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE); else add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); } @@ -4983,7 +5011,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], add_code_range(&(cc->mbuf), env, *to, *to); } else { - BITSET_SET_BIT(bs, *to); + BITSET_SET_BIT_CHKDUP(bs, *to); } } #else @@ -4997,7 +5025,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], BITSET_CLEAR_BIT(bs, *to); } else - BITSET_SET_BIT(bs, *to); + BITSET_SET_BIT_CHKDUP(bs, *to); } } #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ |