utf8.h: Simplify UTF8_EIGHT_BIT_foo on EBCDIC

These macros were previously defined in terms of UTF8_TWO_BYTE_HI and UTF8_TWO_BYTE_LO. But the EIGHT_BIT versions can use the less general and simpler NATIVE_TO_LATIN1 instead of NATIVE_TO_UNI because the input domain is restricted in the EIGHT_BIT. Note that on ASCII platforms, these both expand to the same thing, so the difference matters only on EBCDIC.
author: Karl Williamson <public@khwilliamson.com> 2013-03-01 08:28:52 -0700
committer: Karl Williamson <public@khwilliamson.com> 2013-08-29 09:55:59 -0600
commit: fca3b69698c6f943aa6cd3c097b07f3eafa8113d (patch)
tree: 4775f46d08f95a392d2f1a0ba1e4dd3874a7822e /utf8.h
parent: d95f8b6ab696f65df80deff0ac32dd5be515428b (diff)
download: perl-fca3b69698c6f943aa6cd3c097b07f3eafa8113d.tar.gz
1 files changed, 8 insertions, 5 deletions
diff --git a/utf8.h b/utf8.h
index 1ecb3b82d5..3fb4fd2e3e 100644
--- a/utf8.h
+++ b/utf8.h
@@ -348,11 +348,14 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
 #define UTF8_TWO_BYTE_HI(c)	((U8) (UTF8_TWO_BYTE_HI_nocast(c)))
 #define UTF8_TWO_BYTE_LO(c)	((U8) (UTF8_TWO_BYTE_LO_nocast(c)))
 
-/* This name is used when the source is a single byte.  For EBCDIC these could
- * be more efficiently written; the reason is that things above 0xFF have to be
- * special-cased, which is done by the EBCDIC version of NATIVE_TO_UNI() */
-#define UTF8_EIGHT_BIT_HI(c)	UTF8_TWO_BYTE_HI((U8)(c))
-#define UTF8_EIGHT_BIT_LO(c)	UTF8_TWO_BYTE_LO((U8)(c))
+/* This name is used when the source is a single byte (input not checked).
+ * These expand identically to the TWO_BYTE versions on ASCII platforms, but
+ * use to/from LATIN1 instead of UNI, which on EBCDIC eliminates tests */
+#define UTF8_EIGHT_BIT_HI(c)	I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c)          \
+                        >> UTF_ACCUMULATION_SHIFT) | (0xFF & UTF_START_MARK(2)))
+#define UTF8_EIGHT_BIT_LO(c)	I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c)          \
+                                                  & UTF_CONTINUATION_MASK)      \
+                                                | UTF_CONTINUATION_MARK)
 
 /* This is illegal in any well-formed UTF-8 in both EBCDIC and ASCII
  * as it is only in overlongs. */
author	Karl Williamson <public@khwilliamson.com>	2013-03-01 08:28:52 -0700
committer	Karl Williamson <public@khwilliamson.com>	2013-08-29 09:55:59 -0600
commit	fca3b69698c6f943aa6cd3c097b07f3eafa8113d (patch)
tree	4775f46d08f95a392d2f1a0ba1e4dd3874a7822e /utf8.h
parent	d95f8b6ab696f65df80deff0ac32dd5be515428b (diff)
download	perl-fca3b69698c6f943aa6cd3c097b07f3eafa8113d.tar.gz