summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@khw-desktop.(none)>2009-11-07 17:36:55 -0700
committerRafael Garcia-Suarez <rgs@consttype.org>2009-11-08 12:30:07 +0100
commit5cd46e1f8d2d8792e0bfe33c11fc9ce2dee1c278 (patch)
tree7532ca7dce573c3dd57c259390501e1c22e0f9de
parent4decf0e1fe23a344509e4d89e9bda763cf2eeed1 (diff)
downloadperl-5cd46e1f8d2d8792e0bfe33c11fc9ce2dee1c278.tar.gz
Define specially handled chars; and clean-up ebcdic vs unicode
-rw-r--r--utf8.h25
-rw-r--r--utfebcdic.h15
2 files changed, 23 insertions, 17 deletions
diff --git a/utf8.h b/utf8.h
index 659319e82d..e70559e84c 100644
--- a/utf8.h
+++ b/utf8.h
@@ -112,7 +112,7 @@ encoded character.
#define UNI_IS_INVARIANT(c) (((UV)c) < 0x80)
#define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_UTF(c))
-#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_ASCII(c))
+#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE8_TO_UNI(c))
#define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd))
#define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf))
#define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80)
@@ -235,35 +235,28 @@ encoded character.
#define UTF8_IS_ASCII(c) UTF8_IS_INVARIANT(c)
-#define UNICODE_LATIN_SMALL_LETTER_SHARP_S 0x00DF
#define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3
#define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
#define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3
-#define EBCDIC_LATIN_SMALL_LETTER_SHARP_S 0x0059
-
#define UNI_DISPLAY_ISPRINT 0x0001
#define UNI_DISPLAY_BACKSLASH 0x0002
#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
#define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
-#ifdef EBCDIC
-# define ANYOF_FOLD_SHARP_S(node, input, end) \
- (ANYOF_BITMAP_TEST(node, EBCDIC_LATIN_SMALL_LETTER_SHARP_S) && \
- (ANYOF_FLAGS(node) & ANYOF_UNICODE) && \
- (ANYOF_FLAGS(node) & ANYOF_FOLD) && \
- ((end) > (input) + 1) && \
- toLOWER((input)[0]) == 's' && \
- toLOWER((input)[1]) == 's')
-#else
-# define ANYOF_FOLD_SHARP_S(node, input, end) \
- (ANYOF_BITMAP_TEST(node, UNICODE_LATIN_SMALL_LETTER_SHARP_S) && \
+#ifndef EBCDIC
+# define LATIN_SMALL_LETTER_SHARP_S 0x00DF
+# define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0x00FF
+# define MICRO_SIGN 0x00B5
+#endif
+
+#define ANYOF_FOLD_SHARP_S(node, input, end) \
+ (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \
(ANYOF_FLAGS(node) & ANYOF_UNICODE) && \
(ANYOF_FLAGS(node) & ANYOF_FOLD) && \
((end) > (input) + 1) && \
toLOWER((input)[0]) == 's' && \
toLOWER((input)[1]) == 's')
-#endif
#define SHARP_S_SKIP 2
#ifdef EBCDIC
diff --git a/utfebcdic.h b/utfebcdic.h
index e61b4a7e9c..8a6176c85c 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -293,6 +293,10 @@ EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) *
0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
};
+#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
+#define LATIN_SMALL_LETTER_SHARP_S 0x59
+#define MICRO_SIGN 0xA0
+
EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
@@ -333,6 +337,10 @@ EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */
0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
};
+#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
+#define LATIN_SMALL_LETTER_SHARP_S 0x59
+#define MICRO_SIGN 0xA0
+
EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
@@ -373,6 +381,11 @@ EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */
0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
};
+
+#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
+#define LATIN_SMALL_LETTER_SHARP_S 0x59
+#define MICRO_SIGN 0xA0
+
EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
@@ -409,7 +422,7 @@ END_EXTERN_C
/* Native to iso-8859-1 */
#define NATIVE_TO_ASCII(ch) PL_e2a[(U8)(ch)]
-#define NATIVE8_TO_UNI(ch) NATIVE_TO_ASCII(ch) /* synonym */
+#define NATIVE8_TO_UNI(ch) NATIVE_TO_ASCII(ch) /* a clearer synonym */
#define ASCII_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
/* Transform after encoding */
#define NATIVE_TO_UTF(ch) PL_e2utf[(U8)(ch)]