diff options
author | Karl Williamson <khw@cpan.org> | 2015-05-15 10:59:54 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-09-04 10:21:17 -0600 |
commit | a62b247b9f3d5cc6214f83defea2e06d12398275 (patch) | |
tree | ec2bd2e98a8464e9160031756326688ac8c7b98a /regexec.c | |
parent | 635e76f560b3b3ca075aa2cb5d6d661601968e04 (diff) | |
download | perl-a62b247b9f3d5cc6214f83defea2e06d12398275.tar.gz |
Add macro for converting Latin1 to UTF-8, and use it
This adds a macro that converts a code point in the ASCII 128-255 range
to UTF-8, and changes existing code to use it when the range is known to
be restricted to this one, rather than the previous macro which accepted
a wider range (any code point representable by 2 bytes), but had an
extra test on EBCDIC platforms, hence was larger than necessary and
slightly slower.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 16 |
1 files changed, 8 insertions, 8 deletions
@@ -492,7 +492,7 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character) } else if (UTF8_IS_DOWNGRADEABLE_START(*character)) { return isFOO_lc(classnum, - TWO_BYTE_UTF8_TO_NATIVE(*character, *(character + 1))); + EIGHT_BIT_UTF8_TO_NATIVE(*character, *(character + 1))); } _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, character + UTF8SKIP(character)); @@ -2329,7 +2329,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, classnum))) || (UTF8_IS_DOWNGRADEABLE_START(*s) && to_complement ^ cBOOL( - _generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*s, + _generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1)), classnum)))) { @@ -5386,7 +5386,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) l++; } else { - if (TWO_BYTE_UTF8_TO_NATIVE(*l, *(l+1)) != * (U8*) s) + if (EIGHT_BIT_UTF8_TO_NATIVE(*l, *(l+1)) != * (U8*) s) { sayNO; } @@ -5410,7 +5410,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) s++; } else { - if (TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)) != * (U8*) l) + if (EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)) != * (U8*) l) { sayNO; } @@ -5783,7 +5783,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) } else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) { if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), - (U8) TWO_BYTE_UTF8_TO_NATIVE(nextchr, + (U8) EIGHT_BIT_UTF8_TO_NATIVE(nextchr, *(locinput + 1)))))) { sayNO; @@ -5864,7 +5864,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) } else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) { if (! (to_complement - ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(nextchr, + ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr, *(locinput + 1)), FLAGS(scan))))) { @@ -8141,7 +8141,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, /* Target isn't utf8; convert the character in the UTF-8 * pattern to non-UTF8, and do a simple loop */ - c = TWO_BYTE_UTF8_TO_NATIVE(c, *(STRING(p) + 1)); + c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1)); while (scan < loceol && UCHARAT(scan) == c) { scan++; } @@ -8385,7 +8385,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, } else if (UTF8_IS_DOWNGRADEABLE_START(*scan)) { if (! (to_complement - ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*scan, + ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*scan, *(scan + 1)), classnum)))) { |