summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-05-15 10:59:54 -0600
committerKarl Williamson <khw@cpan.org>2015-09-04 10:21:17 -0600
commita62b247b9f3d5cc6214f83defea2e06d12398275 (patch)
treeec2bd2e98a8464e9160031756326688ac8c7b98a /utf8.c
parent635e76f560b3b3ca075aa2cb5d6d661601968e04 (diff)
downloadperl-a62b247b9f3d5cc6214f83defea2e06d12398275.tar.gz
Add macro for converting Latin1 to UTF-8, and use it
This adds a macro that converts a code point in the ASCII 128-255 range to UTF-8, and changes existing code to use it when the range is known to be restricted to this one, rather than the previous macro which accepted a wider range (any code point representable by 2 bytes), but had an extra test on EBCDIC platforms, hence was larger than necessary and slightly slower.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c24
1 files changed, 12 insertions, 12 deletions
diff --git a/utf8.c b/utf8.c
index 5d4a7cef5f..2a9d20e794 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1057,7 +1057,7 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen)
if (u < uend) {
U8 c1 = *u++;
if (UTF8_IS_CONTINUATION(c1)) {
- c = TWO_BYTE_UTF8_TO_NATIVE(c, c1);
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, c1);
} else {
Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
"Malformed UTF-8 character "
@@ -1133,7 +1133,7 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len)
U8 c = *s++;
if (! UTF8_IS_INVARIANT(c)) {
/* Then it is two-byte encoded */
- c = TWO_BYTE_UTF8_TO_NATIVE(c, *s);
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, *s);
s++;
}
*d++ = c;
@@ -1190,7 +1190,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8)
U8 c = *s++;
if (! UTF8_IS_INVARIANT(c)) {
/* Then it is two-byte encoded */
- c = TWO_BYTE_UTF8_TO_NATIVE(c, *s);
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, *s);
s++;
}
*d++ = c;
@@ -1971,11 +1971,11 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toUPPER_LC(c);
}
else {
- return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return _to_upper_title_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp, 'S');
}
}
@@ -2042,11 +2042,11 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toUPPER_LC(c);
}
else {
- return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return _to_upper_title_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp, 's');
}
}
@@ -2112,11 +2112,11 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toLOWER_LC(c);
}
else {
- return to_lower_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return to_lower_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp);
}
}
@@ -2194,11 +2194,11 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags & FOLD_FLAGS_LOCALE) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toFOLD_LC(c);
}
else {
- return _to_fold_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return _to_fold_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp,
flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
}
@@ -2723,7 +2723,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
else if (UTF8_IS_DOWNGRADEABLE_START(c)) {
klen = 0;
needents = 256;
- off = TWO_BYTE_UTF8_TO_NATIVE(c, *(ptr + 1));
+ off = EIGHT_BIT_UTF8_TO_NATIVE(c, *(ptr + 1));
}
else {
klen = UTF8SKIP(ptr) - 1;