summaryrefslogtreecommitdiff
path: root/utf8_strings.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-09-02 09:44:22 -0600
committerKarl Williamson <public@khwilliamson.com>2012-09-13 21:14:00 -0600
commit525b6419a6e9037dd46cd37b578c11266e7cd2b2 (patch)
treea7cec91887ff1b88591f146dd70289e2176a2562 /utf8_strings.h
parent5f1720e99d3e7fd0da4056940fd040fe824fd2ca (diff)
downloadperl-525b6419a6e9037dd46cd37b578c11266e7cd2b2.tar.gz
regen/utf8_strings.pl: Add ability to get native charset
This adds a new capability to this program: to input a Unicode code point and create a macro that expands to the platform's native value for it. This will allow removal of a bunch of EBCDIC dependencies in the core.
Diffstat (limited to 'utf8_strings.h')
-rw-r--r--utf8_strings.h25
1 files changed, 17 insertions, 8 deletions
diff --git a/utf8_strings.h b/utf8_strings.h
index 49a449a7b1..67a69c3b30 100644
--- a/utf8_strings.h
+++ b/utf8_strings.h
@@ -9,15 +9,16 @@
#define H_UTF8_STRINGS 1
/* This file contains #defines for various Unicode code points. The values
- * for the macros are all or portions of the UTF-8 encoding for the code
- * point. Note that the names all have the suffix "_UTF8".
+ * the macros expand to are the native Unicode code point, or all or portions
+ * of the UTF-8 encoding for the code point. In the former case, the macro
+ * name has the suffix "_NATIVE"; otherwise, the suffix "_UTF8".
*
- * The suffix "_FIRST_BYTE" may be appended to the name if the value is just
- * the first byte of the UTF-8 representation; the value will be a numeric
- * constant.
- *
- * The suffix "_TAIL" is appened if instead it represents all but the first
- * byte. This, and with no suffix are both string constants */
+ * The macros that have the suffix "_UTF8" may have further suffixes, as
+ * follows:
+ * "_FIRST_BYTE" if the value is just the first byte of the UTF-8
+ * representation; the value will be a numeric constant.
+ * "_TAIL" if instead it represents all but the first byte. This, and
+ * with no additional suffix are both string constants */
#define COMBINING_GRAVE_ACCENT_UTF8 "\xCC\x80" /* U+0300 */
#define COMBINING_ACUTE_ACCENT_UTF8 "\xCC\x81" /* U+0301 */
@@ -34,6 +35,14 @@
#define HANGUL_JONGSEONG_KIYEOK_UTF8 "\xE1\x86\xA8" /* U+11A8 */
#define HYPHEN_UTF8 "\xE2\x80\x90" /* U+2010 */
+#define DELETE_NATIVE 0x007F /* U+007F */
+#define LATIN_SMALL_LETTER_SHARP_S_NATIVE 0x00DF /* U+00DF */
+#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_NATIVE 0x00E5 /* U+00E5 */
+#define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_NATIVE 0x00C5 /* U+00C5 */
+#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE 0x00FF /* U+00FF */
+#define MICRO_SIGN_NATIVE 0x00B5 /* U+00B5 */
+#define NEXT_LINE_NATIVE 0x0085 /* U+0085 */
+
#endif /* H_UTF8_STRINGS */
/* ex: set ro: */