summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c26
-rw-r--r--regcomp.h5
-rw-r--r--regexec.c32
3 files changed, 32 insertions, 31 deletions
diff --git a/regcomp.c b/regcomp.c
index 7d9e573913..327abde84f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3632,7 +3632,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (flags & SCF_DO_STCLASS_AND) {
if (!(data->start_class->flags & ANYOF_LOCALE)) {
ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
- if (FLAGS(scan) & USE_UNI) {
+ if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
for (value = 0; value < 256; value++) {
if (!isWORDCHAR_L1(value)) {
ANYOF_BITMAP_CLEAR(data->start_class, value);
@@ -3650,7 +3650,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
else {
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
- else if (FLAGS(scan) & USE_UNI) {
+ else if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
for (value = 0; value < 256; value++) {
if (isWORDCHAR_L1(value)) {
ANYOF_BITMAP_SET(data->start_class, value);
@@ -3679,7 +3679,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (flags & SCF_DO_STCLASS_AND) {
if (!(data->start_class->flags & ANYOF_LOCALE)) {
ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
- if (FLAGS(scan) & USE_UNI) {
+ if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
for (value = 0; value < 256; value++) {
if (isWORDCHAR_L1(value)) {
ANYOF_BITMAP_CLEAR(data->start_class, value);
@@ -3718,7 +3718,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (flags & SCF_DO_STCLASS_AND) {
if (!(data->start_class->flags & ANYOF_LOCALE)) {
ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
- if (FLAGS(scan) & USE_UNI) {
+ if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
for (value = 0; value < 256; value++) {
if (!isSPACE_L1(value)) {
ANYOF_BITMAP_CLEAR(data->start_class, value);
@@ -3737,7 +3737,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (data->start_class->flags & ANYOF_LOCALE) {
ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
}
- else if (FLAGS(scan) & USE_UNI) {
+ else if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
for (value = 0; value < 256; value++) {
if (isSPACE_L1(value)) {
ANYOF_BITMAP_SET(data->start_class, value);
@@ -3766,7 +3766,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (flags & SCF_DO_STCLASS_AND) {
if (!(data->start_class->flags & ANYOF_LOCALE)) {
ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
- if (FLAGS(scan) & USE_UNI) {
+ if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
for (value = 0; value < 256; value++) {
if (isSPACE_L1(value)) {
ANYOF_BITMAP_CLEAR(data->start_class, value);
@@ -3784,7 +3784,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
else {
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
- else if (FLAGS(scan) & USE_UNI) {
+ else if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
for (value = 0; value < 256; value++) {
if (!isSPACE_L1(value)) {
ANYOF_BITMAP_SET(data->start_class, value);
@@ -7400,8 +7400,8 @@ tryagain:
ret = reg_node(pRExC_state, (U8)(ALNUML));
} else {
ret = reg_node(pRExC_state, (U8)(ALNUM));
- FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
}
+ FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= HASWIDTH|SIMPLE;
goto finish_meta_pat;
case 'W':
@@ -7409,8 +7409,8 @@ tryagain:
ret = reg_node(pRExC_state, (U8)(NALNUML));
} else {
ret = reg_node(pRExC_state, (U8)(NALNUM));
- FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
}
+ FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= HASWIDTH|SIMPLE;
goto finish_meta_pat;
case 'b':
@@ -7420,8 +7420,8 @@ tryagain:
ret = reg_node(pRExC_state, (U8)(BOUNDL));
} else {
ret = reg_node(pRExC_state, (U8)(BOUND));
- FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
}
+ FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= SIMPLE;
goto finish_meta_pat;
case 'B':
@@ -7431,8 +7431,8 @@ tryagain:
ret = reg_node(pRExC_state, (U8)(NBOUNDL));
} else {
ret = reg_node(pRExC_state, (U8)(NBOUND));
- FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
}
+ FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= SIMPLE;
goto finish_meta_pat;
case 's':
@@ -7440,8 +7440,8 @@ tryagain:
ret = reg_node(pRExC_state, (U8)(SPACEL));
} else {
ret = reg_node(pRExC_state, (U8)(SPACE));
- FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
}
+ FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= HASWIDTH|SIMPLE;
goto finish_meta_pat;
case 'S':
@@ -7449,8 +7449,8 @@ tryagain:
ret = reg_node(pRExC_state, (U8)(NSPACEL));
} else {
ret = reg_node(pRExC_state, (U8)(NSPACE));
- FLAGS(ret) = (UNI_SEMANTICS) ? USE_UNI : 0;
}
+ FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= HASWIDTH|SIMPLE;
goto finish_meta_pat;
case 'd':
diff --git a/regcomp.h b/regcomp.h
index 96e7ae14f6..48e270f5cf 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -272,8 +272,9 @@ struct regnode_charclass_class {
#undef STRING
#define OP(p) ((p)->type)
-#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
- regnode types */
+#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
+ regnode types. For some, it's the \
+ character set of the regnode */
#define OPERAND(p) (((struct regnode_string *)p)->string)
#define MASK(p) ((char*)OPERAND(p))
#define STR_LEN(p) (((struct regnode_string *)p)->str_len)
diff --git a/regexec.c b/regexec.c
index be0feeb80c..3644302a74 100644
--- a/regexec.c
+++ b/regexec.c
@@ -219,7 +219,7 @@
* between 128 and 255 using Unicode (latin1) semantics. */
#define CCC_TRY_AFF_U(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU,LCFUNC) \
_CCC_TRY_AFF_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC) \
- if (!(OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) & USE_UNI))))) \
+ if (!(OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) == REGEX_UNICODE_CHARSET))))) \
sayNO; \
nextchr = UCHARAT(++locinput); \
break
@@ -261,7 +261,7 @@
#define CCC_TRY_NEG_U(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU,LCFUNC) \
_CCC_TRY_NEG_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU) \
- if ((OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) & USE_UNI))))) \
+ if ((OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) == REGEX_UNICODE_CHARSET))))) \
sayNO; \
nextchr = UCHARAT(++locinput); \
break
@@ -1557,13 +1557,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
tmp = cBOOL((OP(c) == BOUNDL)
? isALNUM_LC(tmp)
: (isWORDCHAR_L1(tmp)
- && (isASCII(tmp) || (FLAGS(c) & USE_UNI))));
+ && (isASCII(tmp) || (FLAGS(c) == REGEX_UNICODE_CHARSET))));
REXEC_FBC_SCAN(
if (tmp ==
!((OP(c) == BOUNDL)
? isALNUM_LC(*s)
: (isWORDCHAR_L1((U8) *s)
- && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI)))))
+ && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET)))))
{
tmp = !tmp;
REXEC_FBC_TRYIT;
@@ -1600,13 +1600,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
tmp = cBOOL((OP(c) == NBOUNDL)
? isALNUM_LC(tmp)
: (isWORDCHAR_L1(tmp)
- && (isASCII(tmp) || (FLAGS(c) & USE_UNI))));
+ && (isASCII(tmp) || (FLAGS(c) == REGEX_UNICODE_CHARSET))));
REXEC_FBC_SCAN(
if (tmp == ! cBOOL(
(OP(c) == NBOUNDL)
? isALNUM_LC(*s)
: (isWORDCHAR_L1((U8) *s)
- && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI)))))
+ && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET)))))
{
tmp = !tmp;
}
@@ -1620,7 +1620,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_CSCAN_PRELOAD(
LOAD_UTF8_CHARCLASS_PERL_WORD(),
swash_fetch(RE_utf8_perl_word, (U8*)s, utf8_target),
- (FLAGS(c) & USE_UNI) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s)
+ (FLAGS(c) == REGEX_UNICODE_CHARSET) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s)
);
case ALNUML:
REXEC_FBC_CSCAN_TAINT(
@@ -1631,7 +1631,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_CSCAN_PRELOAD(
LOAD_UTF8_CHARCLASS_PERL_WORD(),
!swash_fetch(RE_utf8_perl_word, (U8*)s, utf8_target),
- ! ((FLAGS(c) & USE_UNI) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s))
+ ! ((FLAGS(c) == REGEX_UNICODE_CHARSET) ? isWORDCHAR_L1((U8) *s) : isALNUM(*s))
);
case NALNUML:
REXEC_FBC_CSCAN_TAINT(
@@ -1642,7 +1642,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_CSCAN_PRELOAD(
LOAD_UTF8_CHARCLASS_PERL_SPACE(),
*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target),
- isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI))
+ isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET))
);
case SPACEL:
REXEC_FBC_CSCAN_TAINT(
@@ -1653,7 +1653,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_CSCAN_PRELOAD(
LOAD_UTF8_CHARCLASS_PERL_SPACE(),
!(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, utf8_target)),
- !(isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) & USE_UNI)))
+ !(isSPACE_L1((U8) *s) && (isASCII((U8) *s) || (FLAGS(c) == REGEX_UNICODE_CHARSET)))
);
case NSPACEL:
REXEC_FBC_CSCAN_TAINT(
@@ -3651,10 +3651,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
else {
ln = (locinput != PL_bostr) ?
UCHARAT(locinput - 1) : '\n';
- if (FLAGS(scan) & USE_UNI) {
+ if (FLAGS(scan) == REGEX_UNICODE_CHARSET) {
/* Here, can't be BOUNDL or NBOUNDL because they never set
- * the flags to USE_UNI */
+ * the flags to REGEX_UNICODE_CHARSET */
ln = isWORDCHAR_L1(ln);
n = isWORDCHAR_L1(nextchr);
}
@@ -5942,7 +5942,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
scan += UTF8SKIP(scan);
hardcount++;
}
- } else if (FLAGS(p) & USE_UNI) {
+ } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) {
while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
scan++;
}
@@ -5976,7 +5976,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
scan += UTF8SKIP(scan);
hardcount++;
}
- } else if (FLAGS(p) & USE_UNI) {
+ } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) {
while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
scan++;
}
@@ -6011,7 +6011,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
scan += UTF8SKIP(scan);
hardcount++;
}
- } else if (FLAGS(p) & USE_UNI) {
+ } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) {
while (scan < loceol && isSPACE_L1((U8) *scan)) {
scan++;
}
@@ -6045,7 +6045,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
scan += UTF8SKIP(scan);
hardcount++;
}
- } else if (FLAGS(p) & USE_UNI) {
+ } else if (FLAGS(p) == REGEX_UNICODE_CHARSET) {
while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
scan++;
}