diff options
-rw-r--r-- | embed.fnc | 3 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | proto.h | 3 | ||||
-rw-r--r-- | utf8.c | 43 |
4 files changed, 34 insertions, 16 deletions
@@ -600,6 +600,9 @@ ApPR |bool |is_uni_punct |UV c ApPR |bool |is_uni_xdigit |UV c Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp +#ifdef PERL_IN_UTF8_C +sR |U8 |to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp +#endif Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp Amp |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp AMp |UV |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags @@ -1574,6 +1574,7 @@ #define is_utf8_char_slow S_is_utf8_char_slow #define is_utf8_common(a,b,c) S_is_utf8_common(aTHX_ a,b,c) #define swash_get(a,b,c) S_swash_get(aTHX_ a,b,c) +#define to_lower_latin1(a,b,c) S_to_lower_latin1(aTHX_ a,b,c) # endif # if defined(PERL_IN_UTIL_C) #define ckwarn_common(a) S_ckwarn_common(aTHX_ a) @@ -7003,6 +7003,9 @@ STATIC SV* S_swash_get(pTHX_ SV* swash, UV start, UV span) #define PERL_ARGS_ASSERT_SWASH_GET \ assert(swash) +STATIC U8 S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp) + __attribute__warn_unused_result__; + #endif #if defined(PERL_IN_UTIL_C) STATIC bool S_ckwarn_common(pTHX_ U32 w); @@ -1357,29 +1357,40 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp) return to_utf8_title(p, p, lenp); } +STATIC U8 +S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp) +{ + /* We have the latin1-range values compiled into the core, so just use + * those, converting the result to utf8. Since the result is always just + * one character, we allow p to be NULL */ + + U8 converted = toLOWER_LATIN1(c); + + if (p != NULL) { + if (UNI_IS_INVARIANT(converted)) { + *p = converted; + *lenp = 1; + } + else { + *p = UTF8_TWO_BYTE_HI(converted); + *(p+1) = UTF8_TWO_BYTE_LO(converted); + *lenp = 2; + } + } + return converted; +} + UV Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp) { PERL_ARGS_ASSERT_TO_UNI_LOWER; - if (c > 255) { - uvchr_to_utf8(p, c); - return to_utf8_lower(p, p, lenp); + if (c < 256) { + return to_lower_latin1((U8) c, p, lenp); } - /* We have the latin1-range values compiled into the core, so just use - * those, converting the result to utf8 */ - c = toLOWER_LATIN1(c); - if (UNI_IS_INVARIANT(c)) { - *p = c; - *lenp = 1; - } - else { - *p = UTF8_TWO_BYTE_HI(c); - *(p+1) = UTF8_TWO_BYTE_LO(c); - *lenp = 2; - } - return c; + uvchr_to_utf8(p, c); + return to_utf8_lower(p, p, lenp); } UV |