summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-01-18 16:01:11 -0700
committerKarl Williamson <public@khwilliamson.com>2011-01-18 16:35:15 -0700
commit8e9da4d422fd1fb3711b88105d4e58f6b8f88877 (patch)
tree9d0dc85134dc1c7f4d6a4c7afefecd09f977efd7 /regexec.c
parentb57e41186b2ceb48bef4f0588dcd19e105cc8a38 (diff)
downloadperl-8e9da4d422fd1fb3711b88105d4e58f6b8f88877.tar.gz
regexec.c: Fix /a complements
This showed up only on some systems in the current test suite, but processing eg, \D has to care about the target string being utf8.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c24
1 files changed, 21 insertions, 3 deletions
diff --git a/regexec.c b/regexec.c
index 748e047211..3f38828616 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1689,6 +1689,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
isWORDCHAR((U8) *s)
);
case ALNUMA:
+ /* Don't need to worry about utf8, as it can match only a single
+ * byte invariant character */
REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
case NALNUMU:
REXEC_FBC_CSCAN_PRELOAD(
@@ -1703,7 +1705,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
! isALNUM(*s)
);
case NALNUMA:
- REXEC_FBC_UTF8_CLASS_SCAN( !isWORDCHAR_A(*s));
+ REXEC_FBC_CSCAN(
+ !isWORDCHAR_A(*s),
+ !isWORDCHAR_A(*s)
+ );
+ break;
case NALNUML:
REXEC_FBC_CSCAN_TAINT(
!isALNUM_LC_utf8((U8*)s),
@@ -1722,6 +1728,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
isSPACE((U8) *s)
);
case SPACEA:
+ /* Don't need to worry about utf8, as it can match only a single
+ * byte invariant character */
REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
case SPACEL:
REXEC_FBC_CSCAN_TAINT(
@@ -1741,7 +1749,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
! isSPACE((U8) *s)
);
case NSPACEA:
- REXEC_FBC_UTF8_CLASS_SCAN( !isSPACE_A(*s));
+ REXEC_FBC_CSCAN(
+ !isSPACE_A(*s),
+ !isSPACE_A(*s)
+ );
+ break;
case NSPACEL:
REXEC_FBC_CSCAN_TAINT(
!isSPACE_LC_utf8((U8*)s),
@@ -1754,6 +1766,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
isDIGIT(*s)
);
case DIGITA:
+ /* Don't need to worry about utf8, as it can match only a single
+ * byte invariant character */
REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
case DIGITL:
REXEC_FBC_CSCAN_TAINT(
@@ -1767,7 +1781,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
!isDIGIT(*s)
);
case NDIGITA:
- REXEC_FBC_UTF8_CLASS_SCAN( !isDIGIT_A(*s));
+ REXEC_FBC_CSCAN(
+ !isDIGIT_A(*s),
+ !isDIGIT_A(*s)
+ );
+ break;
case NDIGITL:
REXEC_FBC_CSCAN_TAINT(
!isDIGIT_LC_utf8((U8*)s),