diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-12-06 12:16:24 -0700 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2010-12-07 18:52:43 -0800 |
commit | 6bbba9040c7840209170b2ff9a1d7b03ae1cbdc1 (patch) | |
tree | 787e26318fb1eec1e83be40c6d370239f6b3b2f8 /regexec.c | |
parent | b77393f6288f64bf00f41fef15da0fac4085bfd2 (diff) | |
download | perl-6bbba9040c7840209170b2ff9a1d7b03ae1cbdc1.tar.gz |
regexec.c: Fix locale and \s
The handling for locale \s and \S both assume that the character in
ASCII platforms at 0x20 is a space. This is not necessarily so.
I'm guessing that the code was originally just copied and pasted from
the non-locale space handling code without thinking. That code hard-coded
in the space character, probably to avoid an expensive swash fetch for a
common situation.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -1645,7 +1645,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, ); case SPACEL: REXEC_FBC_CSCAN_TAINT( - *s == ' ' || isSPACE_LC_utf8((U8*)s), + isSPACE_LC_utf8((U8*)s), isSPACE_LC(*s) ); case NSPACE: @@ -1656,7 +1656,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, ); case NSPACEL: REXEC_FBC_CSCAN_TAINT( - !(*s == ' ' || isSPACE_LC_utf8((U8*)s)), + !isSPACE_LC_utf8((U8*)s), !isSPACE_LC(*s) ); case DIGIT: @@ -6036,7 +6036,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) if (utf8_target) { loceol = PL_regeol; while (hardcount < max && scan < loceol && - (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { + isSPACE_LC_utf8((U8*)scan)) { scan += UTF8SKIP(scan); hardcount++; } @@ -6071,7 +6071,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) if (utf8_target) { loceol = PL_regeol; while (hardcount < max && scan < loceol && - !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { + !isSPACE_LC_utf8((U8*)scan)) { scan += UTF8SKIP(scan); hardcount++; } |