diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-11-08 18:55:09 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-11-08 22:38:38 -0700 |
commit | afc16117342e69d725e9609816ad29f611edb5a5 (patch) | |
tree | 33fe89eea3edaaf70b06a7b01c47dc75d19d59d9 | |
parent | 50bda2c32d66573a5367b7d0d5a1d287d766b811 (diff) | |
download | perl-afc16117342e69d725e9609816ad29f611edb5a5.tar.gz |
utf8.c: Refactor to_uni_lower()
The portion that deals with Latin1 range characters is refactored into a
separate (static) function, so that it can be called from more than one place.
-rw-r--r-- | embed.fnc | 3 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | proto.h | 3 | ||||
-rw-r--r-- | utf8.c | 43 |
4 files changed, 34 insertions, 16 deletions
@@ -600,6 +600,9 @@ ApPR |bool |is_uni_punct |UV c ApPR |bool |is_uni_xdigit |UV c Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp +#ifdef PERL_IN_UTF8_C +sR |U8 |to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp +#endif Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp Amp |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp AMp |UV |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags @@ -1574,6 +1574,7 @@ #define is_utf8_char_slow S_is_utf8_char_slow #define is_utf8_common(a,b,c) S_is_utf8_common(aTHX_ a,b,c) #define swash_get(a,b,c) S_swash_get(aTHX_ a,b,c) +#define to_lower_latin1(a,b,c) S_to_lower_latin1(aTHX_ a,b,c) # endif # if defined(PERL_IN_UTIL_C) #define ckwarn_common(a) S_ckwarn_common(aTHX_ a) @@ -7003,6 +7003,9 @@ STATIC SV* S_swash_get(pTHX_ SV* swash, UV start, UV span) #define PERL_ARGS_ASSERT_SWASH_GET \ assert(swash) +STATIC U8 S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp) + __attribute__warn_unused_result__; + #endif #if defined(PERL_IN_UTIL_C) STATIC bool S_ckwarn_common(pTHX_ U32 w); @@ -1357,29 +1357,40 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp) return to_utf8_title(p, p, lenp); } +STATIC U8 +S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp) +{ + /* We have the latin1-range values compiled into the core, so just use + * those, converting the result to utf8. Since the result is always just + * one character, we allow p to be NULL */ + + U8 converted = toLOWER_LATIN1(c); + + if (p != NULL) { + if (UNI_IS_INVARIANT(converted)) { + *p = converted; + *lenp = 1; + } + else { + *p = UTF8_TWO_BYTE_HI(converted); + *(p+1) = UTF8_TWO_BYTE_LO(converted); + *lenp = 2; + } + } + return converted; +} + UV Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp) { PERL_ARGS_ASSERT_TO_UNI_LOWER; - if (c > 255) { - uvchr_to_utf8(p, c); - return to_utf8_lower(p, p, lenp); + if (c < 256) { + return to_lower_latin1((U8) c, p, lenp); } - /* We have the latin1-range values compiled into the core, so just use - * those, converting the result to utf8 */ - c = toLOWER_LATIN1(c); - if (UNI_IS_INVARIANT(c)) { - *p = c; - *lenp = 1; - } - else { - *p = UTF8_TWO_BYTE_HI(c); - *(p+1) = UTF8_TWO_BYTE_LO(c); - *lenp = 2; - } - return c; + uvchr_to_utf8(p, c); + return to_utf8_lower(p, p, lenp); } UV |