some macro cleanupsmoke-me/khw-mac

author: Karl Williamson <public@khwilliamson.com> 2012-03-05 20:56:52 -0700
committer: Karl Williamson <public@khwilliamson.com> 2012-03-06 07:11:46 -0700
commit: b9933c15464c85c20ff18d826f910218855216b0 (patch)
tree: 454bf10a8d754303fbebdbad0a07156417d64cd1
parent: fab2782b37b5570d7f8f8065fd7d18621117ed49 (diff)
download: perl-smoke-me/khw-mac.tar.gz
2 files changed, 11 insertions, 38 deletions
diff --git a/utf8.c b/utf8.c
index 2b1e99b23a..f7c28da5e3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -276,43 +276,15 @@ five bytes or more.
 STATIC STRLEN
 S_is_utf8_char_slow(const U8 *s, const STRLEN len)
 {
-    U8 u = *s;
-    STRLEN slen;
-    UV uv, ouv;
-
-    PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;
-
-    if (UTF8_IS_INVARIANT(u))
-	return len == 1;
+    dTHX;   /* The function called below requires thread context */
 
-    if (!UTF8_IS_START(u))
-	return 0;
-
-    if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
-	return 0;
+    STRLEN actual_len;
 
-    slen = len - 1;
-    s++;
-#ifdef EBCDIC
-    u = NATIVE_TO_UTF(u);
-#endif
-    u &= UTF_START_MASK(len);
-    uv  = u;
-    ouv = uv;
-    while (slen--) {
-	if (!UTF8_IS_CONTINUATION(*s))
-	    return 0;
-	uv = UTF8_ACCUMULATE(uv, *s);
-	if (uv < ouv)
-	    return 0;
-	ouv = uv;
-	s++;
-    }
+    PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;
 
-    if ((STRLEN)UNISKIP(uv) < len)
-	return 0;
+    utf8n_to_uvuni(s, len, &actual_len, UTF8_CHECK_ONLY);
 
-    return len;
+    return (actual_len == (STRLEN) -1) ? 0 : actual_len;
 }
 
 /*
@@ -608,7 +580,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
 	goto malformed;
     }
 
-    if (UTF8_IS_START(uv) && curlen > 1 && !UTF8_IS_CONTINUATION(s[1]) &&
+    if (LAX_UTF8_IS_START(uv) && curlen > 1 && !UTF8_IS_CONTINUATION(s[1]) &&
 	!(flags & UTF8_ALLOW_NON_CONTINUATION)) {
 	warning = UTF8_WARN_NON_CONTINUATION;
 	goto malformed;
diff --git a/utf8.h b/utf8.h
index e558bb68d5..ecabb20c30 100644
--- a/utf8.h
+++ b/utf8.h
@@ -139,12 +139,13 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
 */
 
 #define UNI_IS_INVARIANT(c)		(((UV)c) <  0x80)
-/* Note that C0 and C1 are invalid in legal UTF8, so the lower bound of the
- * below might ought to be C2 */
-#define UTF8_IS_START(c)		(((U8)c) >= 0xc0)
+#define UTF8_IS_START(c)		(((U8)c) >= 0xc2)
+#define LAX_UTF8_IS_START(c)		(((U8)c) >= 0xc0)   /* Allows overlong */
 #define UTF8_IS_CONTINUATION(c)		(((U8)c) >= 0x80 && (((U8)c) <= 0xbf))
 #define UTF8_IS_CONTINUED(c) 		(((U8)c) &  0x80)
-#define UTF8_IS_DOWNGRADEABLE_START(c)	(((U8)c & 0xfc) == 0xc0)
+
+/* Masking with 0xfe allows low bit to be 0 or 1; thus this matches 0xc[23] */
+#define UTF8_IS_DOWNGRADEABLE_START(c)	(((U8)c & 0xfe) == 0xc2)
 
 #define UTF_START_MARK(len) (((len) >  7) ? 0xFF : (0xFE << (7-(len))))
 #define UTF_START_MASK(len) (((len) >= 7) ? 0x00 : (0x1F >> ((len)-2)))
author	Karl Williamson <public@khwilliamson.com>	2012-03-05 20:56:52 -0700
committer	Karl Williamson <public@khwilliamson.com>	2012-03-06 07:11:46 -0700
commit	b9933c15464c85c20ff18d826f910218855216b0 (patch)
tree	454bf10a8d754303fbebdbad0a07156417d64cd1
parent	fab2782b37b5570d7f8f8065fd7d18621117ed49 (diff)
download	perl-smoke-me/khw-mac.tar.gz