summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-11-08 18:55:09 -0700
committerKarl Williamson <public@khwilliamson.com>2011-11-08 22:38:38 -0700
commitafc16117342e69d725e9609816ad29f611edb5a5 (patch)
tree33fe89eea3edaaf70b06a7b01c47dc75d19d59d9
parent50bda2c32d66573a5367b7d0d5a1d287d766b811 (diff)
downloadperl-afc16117342e69d725e9609816ad29f611edb5a5.tar.gz
utf8.c: Refactor to_uni_lower()
The portion that deals with Latin1 range characters is refactored into a separate (static) function, so that it can be called from more than one place.
-rw-r--r--embed.fnc3
-rw-r--r--embed.h1
-rw-r--r--proto.h3
-rw-r--r--utf8.c43
4 files changed, 34 insertions, 16 deletions
diff --git a/embed.fnc b/embed.fnc
index 251d475431..035f3db3fe 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -600,6 +600,9 @@ ApPR |bool |is_uni_punct |UV c
ApPR |bool |is_uni_xdigit |UV c
Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp
Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp
+#ifdef PERL_IN_UTF8_C
+sR |U8 |to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp
+#endif
Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp
Amp |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp
AMp |UV |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags
diff --git a/embed.h b/embed.h
index a47f513d05..2c9b827b0d 100644
--- a/embed.h
+++ b/embed.h
@@ -1574,6 +1574,7 @@
#define is_utf8_char_slow S_is_utf8_char_slow
#define is_utf8_common(a,b,c) S_is_utf8_common(aTHX_ a,b,c)
#define swash_get(a,b,c) S_swash_get(aTHX_ a,b,c)
+#define to_lower_latin1(a,b,c) S_to_lower_latin1(aTHX_ a,b,c)
# endif
# if defined(PERL_IN_UTIL_C)
#define ckwarn_common(a) S_ckwarn_common(aTHX_ a)
diff --git a/proto.h b/proto.h
index 6aa9e2da58..2b58991d26 100644
--- a/proto.h
+++ b/proto.h
@@ -7003,6 +7003,9 @@ STATIC SV* S_swash_get(pTHX_ SV* swash, UV start, UV span)
#define PERL_ARGS_ASSERT_SWASH_GET \
assert(swash)
+STATIC U8 S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp)
+ __attribute__warn_unused_result__;
+
#endif
#if defined(PERL_IN_UTIL_C)
STATIC bool S_ckwarn_common(pTHX_ U32 w);
diff --git a/utf8.c b/utf8.c
index 020e4711c9..919d1ccc85 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1357,29 +1357,40 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
return to_utf8_title(p, p, lenp);
}
+STATIC U8
+S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp)
+{
+ /* We have the latin1-range values compiled into the core, so just use
+ * those, converting the result to utf8. Since the result is always just
+ * one character, we allow p to be NULL */
+
+ U8 converted = toLOWER_LATIN1(c);
+
+ if (p != NULL) {
+ if (UNI_IS_INVARIANT(converted)) {
+ *p = converted;
+ *lenp = 1;
+ }
+ else {
+ *p = UTF8_TWO_BYTE_HI(converted);
+ *(p+1) = UTF8_TWO_BYTE_LO(converted);
+ *lenp = 2;
+ }
+ }
+ return converted;
+}
+
UV
Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
{
PERL_ARGS_ASSERT_TO_UNI_LOWER;
- if (c > 255) {
- uvchr_to_utf8(p, c);
- return to_utf8_lower(p, p, lenp);
+ if (c < 256) {
+ return to_lower_latin1((U8) c, p, lenp);
}
- /* We have the latin1-range values compiled into the core, so just use
- * those, converting the result to utf8 */
- c = toLOWER_LATIN1(c);
- if (UNI_IS_INVARIANT(c)) {
- *p = c;
- *lenp = 1;
- }
- else {
- *p = UTF8_TWO_BYTE_HI(c);
- *(p+1) = UTF8_TWO_BYTE_LO(c);
- *lenp = 2;
- }
- return c;
+ uvchr_to_utf8(p, c);
+ return to_utf8_lower(p, p, lenp);
}
UV