diff options
-rw-r--r-- | regcomp.c | 26 | ||||
-rw-r--r-- | regcomp.h | 5 | ||||
-rw-r--r-- | regexec.c | 32 |
3 files changed, 32 insertions, 31 deletions
@@ -3632,7 +3632,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (flags & SCF_DO_STCLASS_AND) { if (!(data->start_class->flags & ANYOF_LOCALE)) { ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM); - if (FLAGS(scan) & USE_UNI) { + if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { for (value = 0; value < 256; value++) { if (!isWORDCHAR_L1(value)) { ANYOF_BITMAP_CLEAR(data->start_class, value); @@ -3650,7 +3650,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, else { if (data->start_class->flags & ANYOF_LOCALE) ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM); - else if (FLAGS(scan) & USE_UNI) { + else if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { for (value = 0; value < 256; value++) { if (isWORDCHAR_L1(value)) { ANYOF_BITMAP_SET(data->start_class, value); @@ -3679,7 +3679,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (flags & SCF_DO_STCLASS_AND) { if (!(data->start_class->flags & ANYOF_LOCALE)) { ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM); - if (FLAGS(scan) & USE_UNI) { + if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { for (value = 0; value < 256; value++) { if (isWORDCHAR_L1(value)) { ANYOF_BITMAP_CLEAR(data->start_class, value); @@ -3718,7 +3718,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (flags & SCF_DO_STCLASS_AND) { if (!(data->start_class->flags & ANYOF_LOCALE)) { ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE); - if (FLAGS(scan) & USE_UNI) { + if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { for (value = 0; value < 256; value++) { if (!isSPACE_L1(value)) { ANYOF_BITMAP_CLEAR(data->start_class, value); @@ -3737,7 +3737,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (data->start_class->flags & ANYOF_LOCALE) { ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE); } - else if (FLAGS(scan) & USE_UNI) { + else if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { for (value = 0; value < 256; value++) { if (isSPACE_L1(value)) { ANYOF_BITMAP_SET(data->start_class, value); @@ -3766,7 +3766,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (flags & SCF_DO_STCLASS_AND) { if (!(data->start_class->flags & ANYOF_LOCALE)) { ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE); - if (FLAGS(scan) & USE_UNI) { + if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { for (value = 0; value < 256; value++) { if (isSPACE_L1(value)) { ANYOF_BITMAP_CLEAR(data->start_class, value); @@ -3784,7 +3784,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, else { if (data->start_class->flags & ANYOF_LOCALE) ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE); - else if (FLAGS(scan) & USE_UNI) { + else if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { for (value = 0; value < 256; value++) { if (!isSPACE_L1(value)) { ANYOF_BITMAP_SET(data->start_class, value); @@ -7400,8 +7400,8 @@ tryagain: ret = reg_node(pRExC_state, (U8)(ALNUML)); } else { ret = reg_node(pRExC_state, (U8)(ALNUM)); - FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0; } + FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= HASWIDTH|SIMPLE; goto finish_meta_pat; case 'W': @@ -7409,8 +7409,8 @@ tryagain: ret = reg_node(pRExC_state, (U8)(NALNUML)); } else { ret = reg_node(pRExC_state, (U8)(NALNUM)); - FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0; } + FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= HASWIDTH|SIMPLE; goto finish_meta_pat; case 'b': @@ -7420,8 +7420,8 @@ tryagain: ret = reg_node(pRExC_state, (U8)(BOUNDL)); } else { ret = reg_node(pRExC_state, (U8)(BOUND)); - FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0; } + FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= SIMPLE; goto finish_meta_pat; case 'B': @@ -7431,8 +7431,8 @@ tryagain: ret = reg_node(pRExC_state, (U8)(NBOUNDL)); } else { ret = reg_node(pRExC_state, (U8)(NBOUND)); - FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0; } + FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= SIMPLE; goto finish_meta_pat; case 's': @@ -7440,8 +7440,8 @@ tryagain: ret = reg_node(pRExC_state, (U8)(SPACEL)); } else { ret = reg_node(pRExC_state, (U8)(SPACE)); - FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0; } + FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= HASWIDTH|SIMPLE; goto finish_meta_pat; case 'S': @@ -7449,8 +7449,8 @@ tryagain: ret = reg_node(pRExC_state, (U8)(NSPACEL)); } else { ret = reg_node(pRExC_state, (U8)(NSPACE)); - FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0; } + FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= HASWIDTH|SIMPLE; goto finish_meta_pat; case 'd': @@ -272,8 +272,9 @@ struct regnode_charclass_class { #undef STRING #define OP(p) ((p)->type) -#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \ - regnode types */ +#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \ + regnode types. For some, it's the \ + character set of the regnode */ #define OPERAND(p) (((struct regnode_string *)p)->string) #define MASK(p) ((char*)OPERAND(p)) #define STR_LEN(p) (((struct regnode_string *)p)->str_len) @@ -219,7 +219,7 @@ * between 128 and 255 using Unicode (latin1) semantics. */ #define CCC_TRY_AFF_U(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU,LCFUNC) \ _CCC_TRY_AFF_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC) \ - if (!(OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) & USE_UNI))))) \ + if (!(OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) == REGEX_UNICODE_CHARSET))))) \ sayNO; \ nextchr = UCHARAT(++locinput); \ break @@ -261,7 +261,7 @@ #define CCC_TRY_NEG_U(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU,LCFUNC) \ _CCC_TRY_NEG_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU) \ - if ((OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) & USE_UNI))))) \ + if ((OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) == REGEX_UNICODE_CHARSET))))) \ sayNO; \ nextchr = UCHARAT(++locinput); \ break @@ -1557,13 +1557,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, tmp = cBOOL((OP(c) == BOUNDL) ? isALNUM_LC(tmp) : (isWORDCHAR_L1(tmp) - && (isASCII(tmp) || (FLAGS(c) & USE_UNI)))); + && (isASCII(tmp) || (FLAGS(c) == REGEX_UNICODE_CHARSET)))); REXEC_FBC_SCAN( if (tmp == !((OP(c) == BOUNDL) ? isALNUM_LC(*s) : (isWORDCHAR_L1((U8) *s) - && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI))))) + && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET))))) { tmp = !tmp; REXEC_FBC_TRYIT; @@ -1600,13 +1600,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, tmp = cBOOL((OP(c) == NBOUNDL) ? isALNUM_LC(tmp) : (isWORDCHAR_L1(tmp) - && (isASCII(tmp) || (FLAGS(c) & USE_UNI)))); + && (isASCII(tmp) || (FLAGS(c) == REGEX_UNICODE_CHARSET)))); REXEC_FBC_SCAN( if (tmp == ! cBOOL( (OP(c) == NBOUNDL) ? isALNUM_LC(*s) : (isWORDCHAR_L1((U8) *s) - && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI))))) + && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET))))) { tmp = !tmp; } @@ -1620,7 +1620,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_WORD(), swash_fetch(RE_utf8_perl_word, (U8*)s, utf8_target), - (FLAGS(c) & USE_UNI) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s) + (FLAGS(c) == REGEX_UNICODE_CHARSET) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s) ); case ALNUML: REXEC_FBC_CSCAN_TAINT( @@ -1631,7 +1631,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_WORD(), !swash_fetch(RE_utf8_perl_word, (U8*)s, utf8_target), - ! ((FLAGS(c) & USE_UNI) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s)) + ! ((FLAGS(c) == REGEX_UNICODE_CHARSET) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s)) ); case NALNUML: REXEC_FBC_CSCAN_TAINT( @@ -1642,7 +1642,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_SPACE(), *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target), - isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI)) + isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET)) ); case SPACEL: REXEC_FBC_CSCAN_TAINT( @@ -1653,7 +1653,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, REXEC_FBC_CSCAN_PRELOAD( LOAD_UTF8_CHARCLASS_PERL_SPACE(), !(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target)), - !(isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI))) + !(isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET))) ); case NSPACEL: REXEC_FBC_CSCAN_TAINT( @@ -3651,10 +3651,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) else { ln = (locinput != PL_bostr) ? UCHARAT(locinput - 1) : '\n'; - if (FLAGS(scan) & USE_UNI) { + if (FLAGS(scan) == REGEX_UNICODE_CHARSET) { /* Here, can't be BOUNDL or NBOUNDL because they never set - * the flags to USE_UNI */ + * the flags to REGEX_UNICODE_CHARSET */ ln = isWORDCHAR_L1(ln); n = isWORDCHAR_L1(nextchr); } @@ -5942,7 +5942,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) scan += UTF8SKIP(scan); hardcount++; } - } else if (FLAGS(p) & USE_UNI) { + } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) { while (scan < loceol && isWORDCHAR_L1((U8) *scan)) { scan++; } @@ -5976,7 +5976,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) scan += UTF8SKIP(scan); hardcount++; } - } else if (FLAGS(p) & USE_UNI) { + } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) { while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) { scan++; } @@ -6011,7 +6011,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) scan += UTF8SKIP(scan); hardcount++; } - } else if (FLAGS(p) & USE_UNI) { + } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) { while (scan < loceol && isSPACE_L1((U8) *scan)) { scan++; } @@ -6045,7 +6045,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) scan += UTF8SKIP(scan); hardcount++; } - } else if (FLAGS(p) & USE_UNI) { + } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) { while (scan < loceol && ! isSPACE_L1((U8) *scan)) { scan++; } |