From afc16117342e69d725e9609816ad29f611edb5a5 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 8 Nov 2011 18:55:09 -0700 Subject: utf8.c: Refactor to_uni_lower() The portion that deals with Latin1 range characters is refactored into a separate (static) function, so that it can be called from more than one place. --- embed.fnc | 3 +++ embed.h | 1 + proto.h | 3 +++ utf8.c | 43 +++++++++++++++++++++++++++---------------- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/embed.fnc b/embed.fnc index 251d475431..035f3db3fe 100644 --- a/embed.fnc +++ b/embed.fnc @@ -600,6 +600,9 @@ ApPR |bool |is_uni_punct |UV c ApPR |bool |is_uni_xdigit |UV c Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp +#ifdef PERL_IN_UTF8_C +sR |U8 |to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp +#endif Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp Amp |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp AMp |UV |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags diff --git a/embed.h b/embed.h index a47f513d05..2c9b827b0d 100644 --- a/embed.h +++ b/embed.h @@ -1574,6 +1574,7 @@ #define is_utf8_char_slow S_is_utf8_char_slow #define is_utf8_common(a,b,c) S_is_utf8_common(aTHX_ a,b,c) #define swash_get(a,b,c) S_swash_get(aTHX_ a,b,c) +#define to_lower_latin1(a,b,c) S_to_lower_latin1(aTHX_ a,b,c) # endif # if defined(PERL_IN_UTIL_C) #define ckwarn_common(a) S_ckwarn_common(aTHX_ a) diff --git a/proto.h b/proto.h index 6aa9e2da58..2b58991d26 100644 --- a/proto.h +++ b/proto.h @@ -7003,6 +7003,9 @@ STATIC SV* S_swash_get(pTHX_ SV* swash, UV start, UV span) #define PERL_ARGS_ASSERT_SWASH_GET \ assert(swash) +STATIC U8 S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp) + __attribute__warn_unused_result__; + #endif #if defined(PERL_IN_UTIL_C) STATIC bool S_ckwarn_common(pTHX_ U32 w); diff --git a/utf8.c b/utf8.c index 020e4711c9..919d1ccc85 100644 --- a/utf8.c +++ b/utf8.c @@ -1357,29 +1357,40 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp) return to_utf8_title(p, p, lenp); } +STATIC U8 +S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp) +{ + /* We have the latin1-range values compiled into the core, so just use + * those, converting the result to utf8. Since the result is always just + * one character, we allow p to be NULL */ + + U8 converted = toLOWER_LATIN1(c); + + if (p != NULL) { + if (UNI_IS_INVARIANT(converted)) { + *p = converted; + *lenp = 1; + } + else { + *p = UTF8_TWO_BYTE_HI(converted); + *(p+1) = UTF8_TWO_BYTE_LO(converted); + *lenp = 2; + } + } + return converted; +} + UV Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp) { PERL_ARGS_ASSERT_TO_UNI_LOWER; - if (c > 255) { - uvchr_to_utf8(p, c); - return to_utf8_lower(p, p, lenp); + if (c < 256) { + return to_lower_latin1((U8) c, p, lenp); } - /* We have the latin1-range values compiled into the core, so just use - * those, converting the result to utf8 */ - c = toLOWER_LATIN1(c); - if (UNI_IS_INVARIANT(c)) { - *p = c; - *lenp = 1; - } - else { - *p = UTF8_TWO_BYTE_HI(c); - *(p+1) = UTF8_TWO_BYTE_LO(c); - *lenp = 2; - } - return c; + uvchr_to_utf8(p, c); + return to_utf8_lower(p, p, lenp); } UV -- cgit v1.2.1