utf8.c: Refactor to_uni_lower()

The portion that deals with Latin1 range characters is refactored into a separate (static) function, so that it can be called from more than one place.
author: Karl Williamson <public@khwilliamson.com> 2011-11-08 18:55:09 -0700
committer: Karl Williamson <public@khwilliamson.com> 2011-11-08 22:38:38 -0700
commit: afc16117342e69d725e9609816ad29f611edb5a5 (patch)
tree: 33fe89eea3edaaf70b06a7b01c47dc75d19d59d9
parent: 50bda2c32d66573a5367b7d0d5a1d287d766b811 (diff)
download: perl-afc16117342e69d725e9609816ad29f611edb5a5.tar.gz
4 files changed, 34 insertions, 16 deletions
diff --git a/embed.fnc b/embed.fnc
index 251d475431..035f3db3fe 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -600,6 +600,9 @@ ApPR	|bool	|is_uni_punct	|UV c
 ApPR	|bool	|is_uni_xdigit	|UV c
 Ap	|UV	|to_uni_upper	|UV c|NN U8 *p|NN STRLEN *lenp
 Ap	|UV	|to_uni_title	|UV c|NN U8 *p|NN STRLEN *lenp
+#ifdef PERL_IN_UTF8_C
+sR	|U8	|to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp
+#endif
 Ap	|UV	|to_uni_lower	|UV c|NN U8 *p|NN STRLEN *lenp
 Amp	|UV	|to_uni_fold	|UV c|NN U8 *p|NN STRLEN *lenp
 AMp	|UV	|_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags
diff --git a/embed.h b/embed.h
index a47f513d05..2c9b827b0d 100644
--- a/embed.h
+++ b/embed.h
@@ -1574,6 +1574,7 @@
 #define is_utf8_char_slow	S_is_utf8_char_slow
 #define is_utf8_common(a,b,c)	S_is_utf8_common(aTHX_ a,b,c)
 #define swash_get(a,b,c)	S_swash_get(aTHX_ a,b,c)
+#define to_lower_latin1(a,b,c)	S_to_lower_latin1(aTHX_ a,b,c)
 #  endif
 #  if defined(PERL_IN_UTIL_C)
 #define ckwarn_common(a)	S_ckwarn_common(aTHX_ a)
diff --git a/proto.h b/proto.h
index 6aa9e2da58..2b58991d26 100644
--- a/proto.h
+++ b/proto.h
@@ -7003,6 +7003,9 @@ STATIC SV*	S_swash_get(pTHX_ SV* swash, UV start, UV span)
 #define PERL_ARGS_ASSERT_SWASH_GET	\
 	assert(swash)
 
+STATIC U8	S_to_lower_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp)
+			__attribute__warn_unused_result__;
+
 #endif
 #if defined(PERL_IN_UTIL_C)
 STATIC bool	S_ckwarn_common(pTHX_ U32 w);
diff --git a/utf8.c b/utf8.c
index 020e4711c9..919d1ccc85 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1357,29 +1357,40 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
     return to_utf8_title(p, p, lenp);
 }
 
+STATIC U8
+S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp)
+{
+    /* We have the latin1-range values compiled into the core, so just use
+     * those, converting the result to utf8.  Since the result is always just
+     * one character, we allow p to be NULL */
+
+    U8 converted = toLOWER_LATIN1(c);
+
+    if (p != NULL) {
+	if (UNI_IS_INVARIANT(converted)) {
+	    *p = converted;
+	    *lenp = 1;
+	}
+	else {
+	    *p = UTF8_TWO_BYTE_HI(converted);
+	    *(p+1) = UTF8_TWO_BYTE_LO(converted);
+	    *lenp = 2;
+	}
+    }
+    return converted;
+}
+
 UV
 Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
 {
     PERL_ARGS_ASSERT_TO_UNI_LOWER;
 
-    if (c > 255) {
-	uvchr_to_utf8(p, c);
-	return to_utf8_lower(p, p, lenp);
+    if (c < 256) {
+	return to_lower_latin1((U8) c, p, lenp);
     }
 
-    /* We have the latin1-range values compiled into the core, so just use
-     * those, converting the result to utf8 */
-    c = toLOWER_LATIN1(c);
-    if (UNI_IS_INVARIANT(c)) {
-	*p = c;
-	*lenp = 1;
-    }
-    else {
-	*p = UTF8_TWO_BYTE_HI(c);
-	*(p+1) = UTF8_TWO_BYTE_LO(c);
-	*lenp = 2;
-    }
-    return c;
+    uvchr_to_utf8(p, c);
+    return to_utf8_lower(p, p, lenp);
 }
 
 UV
author	Karl Williamson <public@khwilliamson.com>	2011-11-08 18:55:09 -0700
committer	Karl Williamson <public@khwilliamson.com>	2011-11-08 22:38:38 -0700
commit	afc16117342e69d725e9609816ad29f611edb5a5 (patch)
tree	33fe89eea3edaaf70b06a7b01c47dc75d19d59d9
parent	50bda2c32d66573a5367b7d0d5a1d287d766b811 (diff)
download	perl-afc16117342e69d725e9609816ad29f611edb5a5.tar.gz