diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-01-18 16:01:11 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-01-18 16:35:15 -0700 |
commit | 8e9da4d422fd1fb3711b88105d4e58f6b8f88877 (patch) | |
tree | 9d0dc85134dc1c7f4d6a4c7afefecd09f977efd7 /regexec.c | |
parent | b57e41186b2ceb48bef4f0588dcd19e105cc8a38 (diff) | |
download | perl-8e9da4d422fd1fb3711b88105d4e58f6b8f88877.tar.gz |
regexec.c: Fix /a complements
This showed up only on some systems in the current test suite, but processing
eg, \D has to care about the target string being utf8.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 24 |
1 files changed, 21 insertions, 3 deletions
@@ -1689,6 +1689,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, isWORDCHAR((U8) *s) ); case ALNUMA: + /* Don't need to worry about utf8, as it can match only a single + * byte invariant character */ REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s)); case NALNUMU: REXEC_FBC_CSCAN_PRELOAD( @@ -1703,7 +1705,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, ! isALNUM(*s) ); case NALNUMA: - REXEC_FBC_UTF8_CLASS_SCAN( !isWORDCHAR_A(*s)); + REXEC_FBC_CSCAN( + !isWORDCHAR_A(*s), + !isWORDCHAR_A(*s) + ); + break; case NALNUML: REXEC_FBC_CSCAN_TAINT( !isALNUM_LC_utf8((U8*)s), @@ -1722,6 +1728,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, isSPACE((U8) *s) ); case SPACEA: + /* Don't need to worry about utf8, as it can match only a single + * byte invariant character */ REXEC_FBC_CLASS_SCAN( isSPACE_A(*s)); case SPACEL: REXEC_FBC_CSCAN_TAINT( @@ -1741,7 +1749,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, ! isSPACE((U8) *s) ); case NSPACEA: - REXEC_FBC_UTF8_CLASS_SCAN( !isSPACE_A(*s)); + REXEC_FBC_CSCAN( + !isSPACE_A(*s), + !isSPACE_A(*s) + ); + break; case NSPACEL: REXEC_FBC_CSCAN_TAINT( !isSPACE_LC_utf8((U8*)s), @@ -1754,6 +1766,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, isDIGIT(*s) ); case DIGITA: + /* Don't need to worry about utf8, as it can match only a single + * byte invariant character */ REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s)); case DIGITL: REXEC_FBC_CSCAN_TAINT( @@ -1767,7 +1781,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, !isDIGIT(*s) ); case NDIGITA: - REXEC_FBC_UTF8_CLASS_SCAN( !isDIGIT_A(*s)); + REXEC_FBC_CSCAN( + !isDIGIT_A(*s), + !isDIGIT_A(*s) + ); + break; case NDIGITL: REXEC_FBC_CSCAN_TAINT( !isDIGIT_LC_utf8((U8*)s), |