diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-04-28 18:38:24 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-05-22 08:24:18 -0600 |
commit | 28936164408fd41cfaa353665e07fdb257254b20 (patch) | |
tree | 2f1507e368e61c34b7a8c75ab6639d4be7a5ea9d /utf8.h | |
parent | a4f7a67c079118a2f900d5f95d3cada67e3475ea (diff) | |
download | perl-28936164408fd41cfaa353665e07fdb257254b20.tar.gz |
utf8.h, pp.c: Add UTF8_IS_REPLACEMENT macro, and use it
This should speed things up slightly, as it looks directly at the UTF-8
source, instead of having to decode it first.
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 10 |
1 files changed, 10 insertions, 0 deletions
@@ -347,8 +347,18 @@ Perl's extended UTF-8 means we can have start bytes up to FF. # define UTF8_IS_SURROGATE(s) (*(s) == UTF_TO_NATIVE(0xF1) \ && ((*((s) +1) == UTF_TO_NATIVE(0xB6)) \ || *((s) + 1) == UTF_TO_NATIVE(0xB7))) + /* <send> points to one beyond the end of the string that starts at <s> */ +# define UTF8_IS_REPLACEMENT(s, send) (*(s) == UTF_TO_NATIVE(0xEF) \ + && (send - s) >= 4 \ + && *((s) + 1) == UTF_TO_NATIVE(0xBF) \ + && *((s) + 2) == UTF_TO_NATIVE(0xBF) \ + && *((s) + 3) == UTF_TO_NATIVE(0xBD) #else # define UTF8_IS_SURROGATE(s) (*(s) == 0xED && *((s) + 1) >= 0xA0) +# define UTF8_IS_REPLACEMENT(s, send) (*(s) == 0xEF \ + && (send - s) >= 3 \ + && *((s) + 1) == 0xBF \ + && *((s) + 2) == 0xBD) #endif /* ASCII EBCDIC I8 |