summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorSara Golemon <pollita@php.net>2014-12-15 14:29:52 -0800
committerSara Golemon <sgolemon@fb.com>2015-01-16 14:37:44 -0800
commitebb60ac7dd179a3bea540d50a7d595010a82a656 (patch)
tree4a55cfedf9f3ad7fb156ab2025d8d224f7a14bc0 /ext
parent22bb3ce61a895f77178078da4a7e29cb39a532a7 (diff)
downloadphp-git-ebb60ac7dd179a3bea540d50a7d595010a82a656.tar.gz
Add IntlChar class to intl extension
Exposes ICU's uchar functionality as a set of static methods
Diffstat (limited to 'ext')
-rw-r--r--ext/intl/config.m42
-rw-r--r--ext/intl/config.w323
-rw-r--r--ext/intl/php_intl.c4
-rw-r--r--ext/intl/uchar/tests/basic-functionality.phpt153
-rw-r--r--ext/intl/uchar/ublockcode-enum.h266
-rw-r--r--ext/intl/uchar/uchar.c769
-rw-r--r--ext/intl/uchar/uchar.h8
-rw-r--r--ext/intl/uchar/ucharcategory-enum.h33
-rw-r--r--ext/intl/uchar/uchardirection-enum.h26
-rw-r--r--ext/intl/uchar/uother-enum.h284
-rw-r--r--ext/intl/uchar/uproperty-enum.h118
11 files changed, 1666 insertions, 0 deletions
diff --git a/ext/intl/config.m4 b/ext/intl/config.m4
index 0fbbd0f786..dd2827621e 100644
--- a/ext/intl/config.m4
+++ b/ext/intl/config.m4
@@ -84,6 +84,7 @@ if test "$PHP_INTL" != "no"; then
breakiterator/rulebasedbreakiterator_methods.cpp \
breakiterator/codepointiterator_internal.cpp \
breakiterator/codepointiterator_methods.cpp \
+ uchar/uchar.c \
idn/idn.c \
$icu_spoof_src, $ext_shared,,$ICU_INCS -Wno-write-strings -D__STDC_LIMIT_MACROS -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
PHP_ADD_BUILD_DIR($ext_builddir/collator)
@@ -102,4 +103,5 @@ if test "$PHP_INTL" != "no"; then
PHP_ADD_BUILD_DIR($ext_builddir/idn)
PHP_ADD_BUILD_DIR($ext_builddir/spoofchecker)
PHP_ADD_BUILD_DIR($ext_builddir/breakiterator)
+ PHP_ADD_BUILD_DIR($ext_builddir/uchar)
fi
diff --git a/ext/intl/config.w32 b/ext/intl/config.w32
index 4628e434f7..76bd19177b 100644
--- a/ext/intl/config.w32
+++ b/ext/intl/config.w32
@@ -72,6 +72,9 @@ if (PHP_INTL != "no") {
dateformat_helpers.cpp \
dateformat_create.cpp \
", "intl");
+ ADD_SOURCES(configure_module_dirname + "/uchar", "\
+ uchar.c",
+ "intl");
ADD_SOURCES(configure_module_dirname + "/idn", "\
idn.c",
"intl");
diff --git a/ext/intl/php_intl.c b/ext/intl/php_intl.c
index 3e45e48016..aef6dc15c7 100644
--- a/ext/intl/php_intl.c
+++ b/ext/intl/php_intl.c
@@ -85,6 +85,7 @@
#include "breakiterator/breakiterator_iterators.h"
#include "idn/idn.h"
+#include "uchar/uchar.h"
#if U_ICU_VERSION_MAJOR_NUM * 1000 + U_ICU_VERSION_MINOR_NUM >= 4002
# include "spoofchecker/spoofchecker_class.h"
@@ -1003,6 +1004,9 @@ PHP_MINIT_FUNCTION( intl )
/* 'Converter' class for codepage conversions */
php_converter_minit(INIT_FUNC_ARGS_PASSTHRU);
+ /* IntlChar class */
+ php_uchar_minit(INIT_FUNC_ARGS_PASSTHRU);
+
return SUCCESS;
}
/* }}} */
diff --git a/ext/intl/uchar/tests/basic-functionality.phpt b/ext/intl/uchar/tests/basic-functionality.phpt
new file mode 100644
index 0000000000..eb8e4fb261
--- /dev/null
+++ b/ext/intl/uchar/tests/basic-functionality.phpt
@@ -0,0 +1,153 @@
+--TEST--
+IntlChar basic functionality
+--FILE--
+<?php
+
+function unicode_info($cp) {
+ $proplist = array(
+ IntlChar::PROPERTY_ALPHABETIC,
+ );
+ $methodList = array(
+ 'isUAlphabetic',
+ 'isUUppercase', 'isupper',
+ 'isULowercase', 'islower',
+ 'isUWhiteSpace', 'isWhitespace',
+ 'istitle', 'isdigit', 'isalpha', 'isalnum',
+ 'isxdigit', 'ispunct', 'ispunct', 'isgraph',
+ 'isblank', 'isdefined', 'isspace', 'iscntrl',
+ 'isMirrored', 'isIDStart', 'isIDPart',
+ 'getBlockCode', 'charName',
+ );
+
+ $ncp = IntlChar::ord($cp);
+ printf("Codepoint U+%04x\n", $ncp);
+
+ foreach($proplist as $prop) {
+ printf(" hasBinaryProperty(%s): %s\n",
+ IntlChar::getPropertyName($prop),
+ IntlChar::hasBinaryProperty($cp, $prop) ? "true" : "false"
+ );
+ }
+ foreach($methodList as $method) {
+ echo " $method(): ";
+ var_dump(IntlChar::$method($cp));
+ }
+ echo " charAge(): ", implode('.', IntlChar::charAge($cp)), "\n";
+ echo "\n";
+}
+
+printf("Codepoint range: %04x-%04x\n", IntlChar::CODEPOINT_MIN, IntlChar::CODEPOINT_MAX);
+$codepoints = array('P', 0xDF, 0x2603);
+foreach($codepoints as $cp) {
+ unicode_info($cp);
+}
+echo "Sample range of codepoints: U+2600-U+260F\n";
+IntlChar::enumCharNames(0x2600, 0x2610, function($cp, $nc, $name) {
+ printf("U+%04x %s\n", $cp, $name);
+});
+echo "RECYCLING SYMBOL FOR TYPE-1 PLASTICS => ";
+var_dump(IntlChar::charFromName("RECYCLING SYMBOL FOR TYPE-1 PLASTICS"));
+--EXPECT--
+Codepoint range: 0000-10ffff
+Codepoint U+0050
+ hasBinaryProperty(Alphabetic): true
+ isUAlphabetic(): bool(true)
+ isUUppercase(): bool(true)
+ isupper(): bool(true)
+ isULowercase(): bool(false)
+ islower(): bool(false)
+ isUWhiteSpace(): bool(false)
+ isWhitespace(): bool(false)
+ istitle(): bool(false)
+ isdigit(): bool(false)
+ isalpha(): bool(true)
+ isalnum(): bool(true)
+ isxdigit(): bool(false)
+ ispunct(): bool(false)
+ ispunct(): bool(false)
+ isgraph(): bool(true)
+ isblank(): bool(false)
+ isdefined(): bool(true)
+ isspace(): bool(false)
+ iscntrl(): bool(false)
+ isMirrored(): bool(false)
+ isIDStart(): bool(true)
+ isIDPart(): bool(true)
+ getBlockCode(): int(1)
+ charName(): string(22) "LATIN CAPITAL LETTER P"
+ charAge(): 1.1.0.0
+
+Codepoint U+00df
+ hasBinaryProperty(Alphabetic): true
+ isUAlphabetic(): bool(true)
+ isUUppercase(): bool(false)
+ isupper(): bool(false)
+ isULowercase(): bool(true)
+ islower(): bool(true)
+ isUWhiteSpace(): bool(false)
+ isWhitespace(): bool(false)
+ istitle(): bool(false)
+ isdigit(): bool(false)
+ isalpha(): bool(true)
+ isalnum(): bool(true)
+ isxdigit(): bool(false)
+ ispunct(): bool(false)
+ ispunct(): bool(false)
+ isgraph(): bool(true)
+ isblank(): bool(false)
+ isdefined(): bool(true)
+ isspace(): bool(false)
+ iscntrl(): bool(false)
+ isMirrored(): bool(false)
+ isIDStart(): bool(true)
+ isIDPart(): bool(true)
+ getBlockCode(): int(2)
+ charName(): string(26) "LATIN SMALL LETTER SHARP S"
+ charAge(): 1.1.0.0
+
+Codepoint U+2603
+ hasBinaryProperty(Alphabetic): false
+ isUAlphabetic(): bool(false)
+ isUUppercase(): bool(false)
+ isupper(): bool(false)
+ isULowercase(): bool(false)
+ islower(): bool(false)
+ isUWhiteSpace(): bool(false)
+ isWhitespace(): bool(false)
+ istitle(): bool(false)
+ isdigit(): bool(false)
+ isalpha(): bool(false)
+ isalnum(): bool(false)
+ isxdigit(): bool(false)
+ ispunct(): bool(false)
+ ispunct(): bool(false)
+ isgraph(): bool(true)
+ isblank(): bool(false)
+ isdefined(): bool(true)
+ isspace(): bool(false)
+ iscntrl(): bool(false)
+ isMirrored(): bool(false)
+ isIDStart(): bool(false)
+ isIDPart(): bool(false)
+ getBlockCode(): int(55)
+ charName(): string(7) "SNOWMAN"
+ charAge(): 1.1.0.0
+
+Sample range of codepoints: U+2600-U+260F
+U+2600 BLACK SUN WITH RAYS
+U+2601 CLOUD
+U+2602 UMBRELLA
+U+2603 SNOWMAN
+U+2604 COMET
+U+2605 BLACK STAR
+U+2606 WHITE STAR
+U+2607 LIGHTNING
+U+2608 THUNDERSTORM
+U+2609 SUN
+U+260a ASCENDING NODE
+U+260b DESCENDING NODE
+U+260c CONJUNCTION
+U+260d OPPOSITION
+U+260e BLACK TELEPHONE
+U+260f WHITE TELEPHONE
+RECYCLING SYMBOL FOR TYPE-1 PLASTICS => int(9843)
diff --git a/ext/intl/uchar/ublockcode-enum.h b/ext/intl/uchar/ublockcode-enum.h
new file mode 100644
index 0000000000..aaed7f12ed
--- /dev/null
+++ b/ext/intl/uchar/ublockcode-enum.h
@@ -0,0 +1,266 @@
+/* UBlockCode http://icu-project.org/apiref/icu4c/uchar_8h.html#a19f1662c9c33c31593612d05777ea901*/
+UBLOCKCODE(NO_BLOCK)
+UBLOCKCODE(BASIC_LATIN)
+UBLOCKCODE(LATIN_1_SUPPLEMENT)
+UBLOCKCODE(LATIN_EXTENDED_A)
+UBLOCKCODE(LATIN_EXTENDED_B)
+UBLOCKCODE(IPA_EXTENSIONS)
+UBLOCKCODE(SPACING_MODIFIER_LETTERS)
+UBLOCKCODE(COMBINING_DIACRITICAL_MARKS)
+UBLOCKCODE(GREEK)
+UBLOCKCODE(CYRILLIC)
+UBLOCKCODE(ARMENIAN)
+UBLOCKCODE(HEBREW)
+UBLOCKCODE(ARABIC)
+UBLOCKCODE(SYRIAC)
+UBLOCKCODE(THAANA)
+UBLOCKCODE(DEVANAGARI)
+UBLOCKCODE(BENGALI)
+UBLOCKCODE(GURMUKHI)
+UBLOCKCODE(GUJARATI)
+UBLOCKCODE(ORIYA)
+UBLOCKCODE(TAMIL)
+UBLOCKCODE(TELUGU)
+UBLOCKCODE(KANNADA)
+UBLOCKCODE(MALAYALAM)
+UBLOCKCODE(SINHALA)
+UBLOCKCODE(THAI)
+UBLOCKCODE(LAO)
+UBLOCKCODE(TIBETAN)
+UBLOCKCODE(MYANMAR)
+UBLOCKCODE(GEORGIAN)
+UBLOCKCODE(HANGUL_JAMO)
+UBLOCKCODE(ETHIOPIC)
+UBLOCKCODE(CHEROKEE)
+UBLOCKCODE(UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS)
+UBLOCKCODE(OGHAM)
+UBLOCKCODE(RUNIC)
+UBLOCKCODE(KHMER)
+UBLOCKCODE(MONGOLIAN)
+UBLOCKCODE(LATIN_EXTENDED_ADDITIONAL)
+UBLOCKCODE(GREEK_EXTENDED)
+UBLOCKCODE(GENERAL_PUNCTUATION)
+UBLOCKCODE(SUPERSCRIPTS_AND_SUBSCRIPTS)
+UBLOCKCODE(CURRENCY_SYMBOLS)
+UBLOCKCODE(COMBINING_MARKS_FOR_SYMBOLS)
+UBLOCKCODE(LETTERLIKE_SYMBOLS)
+UBLOCKCODE(NUMBER_FORMS)
+UBLOCKCODE(ARROWS)
+UBLOCKCODE(MATHEMATICAL_OPERATORS)
+UBLOCKCODE(MISCELLANEOUS_TECHNICAL)
+UBLOCKCODE(CONTROL_PICTURES)
+UBLOCKCODE(OPTICAL_CHARACTER_RECOGNITION)
+UBLOCKCODE(ENCLOSED_ALPHANUMERICS)
+UBLOCKCODE(BOX_DRAWING)
+UBLOCKCODE(BLOCK_ELEMENTS)
+UBLOCKCODE(GEOMETRIC_SHAPES)
+UBLOCKCODE(MISCELLANEOUS_SYMBOLS)
+UBLOCKCODE(DINGBATS)
+UBLOCKCODE(BRAILLE_PATTERNS)
+UBLOCKCODE(CJK_RADICALS_SUPPLEMENT)
+UBLOCKCODE(KANGXI_RADICALS)
+UBLOCKCODE(IDEOGRAPHIC_DESCRIPTION_CHARACTERS)
+UBLOCKCODE(CJK_SYMBOLS_AND_PUNCTUATION)
+UBLOCKCODE(HIRAGANA)
+UBLOCKCODE(KATAKANA)
+UBLOCKCODE(BOPOMOFO)
+UBLOCKCODE(HANGUL_COMPATIBILITY_JAMO)
+UBLOCKCODE(KANBUN)
+UBLOCKCODE(BOPOMOFO_EXTENDED)
+UBLOCKCODE(ENCLOSED_CJK_LETTERS_AND_MONTHS)
+UBLOCKCODE(CJK_COMPATIBILITY)
+UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A)
+UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS)
+UBLOCKCODE(YI_SYLLABLES)
+UBLOCKCODE(YI_RADICALS)
+UBLOCKCODE(HANGUL_SYLLABLES)
+UBLOCKCODE(HIGH_SURROGATES)
+UBLOCKCODE(HIGH_PRIVATE_USE_SURROGATES)
+UBLOCKCODE(LOW_SURROGATES)
+UBLOCKCODE(PRIVATE_USE_AREA)
+UBLOCKCODE(PRIVATE_USE)
+UBLOCKCODE(CJK_COMPATIBILITY_IDEOGRAPHS)
+UBLOCKCODE(ALPHABETIC_PRESENTATION_FORMS)
+UBLOCKCODE(ARABIC_PRESENTATION_FORMS_A)
+UBLOCKCODE(COMBINING_HALF_MARKS)
+UBLOCKCODE(CJK_COMPATIBILITY_FORMS)
+UBLOCKCODE(SMALL_FORM_VARIANTS)
+UBLOCKCODE(ARABIC_PRESENTATION_FORMS_B)
+UBLOCKCODE(SPECIALS)
+UBLOCKCODE(HALFWIDTH_AND_FULLWIDTH_FORMS)
+UBLOCKCODE(OLD_ITALIC)
+UBLOCKCODE(GOTHIC)
+UBLOCKCODE(DESERET)
+UBLOCKCODE(BYZANTINE_MUSICAL_SYMBOLS)
+UBLOCKCODE(MUSICAL_SYMBOLS)
+UBLOCKCODE(MATHEMATICAL_ALPHANUMERIC_SYMBOLS)
+UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B)
+UBLOCKCODE(CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT)
+UBLOCKCODE(TAGS)
+UBLOCKCODE(CYRILLIC_SUPPLEMENT)
+UBLOCKCODE(CYRILLIC_SUPPLEMENTARY)
+UBLOCKCODE(TAGALOG)
+UBLOCKCODE(HANUNOO)
+UBLOCKCODE(BUHID)
+UBLOCKCODE(TAGBANWA)
+UBLOCKCODE(MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A)
+UBLOCKCODE(SUPPLEMENTAL_ARROWS_A)
+UBLOCKCODE(SUPPLEMENTAL_ARROWS_B)
+UBLOCKCODE(MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B)
+UBLOCKCODE(SUPPLEMENTAL_MATHEMATICAL_OPERATORS)
+UBLOCKCODE(KATAKANA_PHONETIC_EXTENSIONS)
+UBLOCKCODE(VARIATION_SELECTORS)
+UBLOCKCODE(SUPPLEMENTARY_PRIVATE_USE_AREA_A)
+UBLOCKCODE(SUPPLEMENTARY_PRIVATE_USE_AREA_B)
+UBLOCKCODE(LIMBU)
+UBLOCKCODE(TAI_LE)
+UBLOCKCODE(KHMER_SYMBOLS)
+UBLOCKCODE(PHONETIC_EXTENSIONS)
+UBLOCKCODE(MISCELLANEOUS_SYMBOLS_AND_ARROWS)
+UBLOCKCODE(YIJING_HEXAGRAM_SYMBOLS)
+UBLOCKCODE(LINEAR_B_SYLLABARY)
+UBLOCKCODE(LINEAR_B_IDEOGRAMS)
+UBLOCKCODE(AEGEAN_NUMBERS)
+UBLOCKCODE(UGARITIC)
+UBLOCKCODE(SHAVIAN)
+UBLOCKCODE(OSMANYA)
+UBLOCKCODE(CYPRIOT_SYLLABARY)
+UBLOCKCODE(TAI_XUAN_JING_SYMBOLS)
+UBLOCKCODE(VARIATION_SELECTORS_SUPPLEMENT)
+UBLOCKCODE(ANCIENT_GREEK_MUSICAL_NOTATION)
+UBLOCKCODE(ANCIENT_GREEK_NUMBERS)
+UBLOCKCODE(ARABIC_SUPPLEMENT)
+UBLOCKCODE(BUGINESE)
+UBLOCKCODE(CJK_STROKES)
+UBLOCKCODE(COMBINING_DIACRITICAL_MARKS_SUPPLEMENT)
+UBLOCKCODE(COPTIC)
+UBLOCKCODE(ETHIOPIC_EXTENDED)
+UBLOCKCODE(ETHIOPIC_SUPPLEMENT)
+UBLOCKCODE(GEORGIAN_SUPPLEMENT)
+UBLOCKCODE(GLAGOLITIC)
+UBLOCKCODE(KHAROSHTHI)
+UBLOCKCODE(MODIFIER_TONE_LETTERS)
+UBLOCKCODE(NEW_TAI_LUE)
+UBLOCKCODE(OLD_PERSIAN)
+UBLOCKCODE(PHONETIC_EXTENSIONS_SUPPLEMENT)
+UBLOCKCODE(SUPPLEMENTAL_PUNCTUATION)
+UBLOCKCODE(SYLOTI_NAGRI)
+UBLOCKCODE(TIFINAGH)
+UBLOCKCODE(VERTICAL_FORMS)
+UBLOCKCODE(NKO)
+UBLOCKCODE(BALINESE)
+UBLOCKCODE(LATIN_EXTENDED_C)
+UBLOCKCODE(LATIN_EXTENDED_D)
+UBLOCKCODE(PHAGS_PA)
+UBLOCKCODE(PHOENICIAN)
+UBLOCKCODE(CUNEIFORM)
+UBLOCKCODE(CUNEIFORM_NUMBERS_AND_PUNCTUATION)
+UBLOCKCODE(COUNTING_ROD_NUMERALS)
+UBLOCKCODE(SUNDANESE)
+UBLOCKCODE(LEPCHA)
+UBLOCKCODE(OL_CHIKI)
+UBLOCKCODE(CYRILLIC_EXTENDED_A)
+UBLOCKCODE(VAI)
+UBLOCKCODE(CYRILLIC_EXTENDED_B)
+UBLOCKCODE(SAURASHTRA)
+UBLOCKCODE(KAYAH_LI)
+UBLOCKCODE(REJANG)
+UBLOCKCODE(CHAM)
+UBLOCKCODE(ANCIENT_SYMBOLS)
+UBLOCKCODE(PHAISTOS_DISC)
+UBLOCKCODE(LYCIAN)
+UBLOCKCODE(CARIAN)
+UBLOCKCODE(LYDIAN)
+UBLOCKCODE(MAHJONG_TILES)
+UBLOCKCODE(DOMINO_TILES)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44
+UBLOCKCODE(SAMARITAN)
+UBLOCKCODE(UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED)
+UBLOCKCODE(TAI_THAM)
+UBLOCKCODE(VEDIC_EXTENSIONS)
+UBLOCKCODE(LISU)
+UBLOCKCODE(BAMUM)
+UBLOCKCODE(COMMON_INDIC_NUMBER_FORMS)
+UBLOCKCODE(DEVANAGARI_EXTENDED)
+UBLOCKCODE(HANGUL_JAMO_EXTENDED_A)
+UBLOCKCODE(JAVANESE)
+UBLOCKCODE(MYANMAR_EXTENDED_A)
+UBLOCKCODE(TAI_VIET)
+UBLOCKCODE(MEETEI_MAYEK)
+UBLOCKCODE(HANGUL_JAMO_EXTENDED_B)
+UBLOCKCODE(IMPERIAL_ARAMAIC)
+UBLOCKCODE(OLD_SOUTH_ARABIAN)
+UBLOCKCODE(AVESTAN)
+UBLOCKCODE(INSCRIPTIONAL_PARTHIAN)
+UBLOCKCODE(INSCRIPTIONAL_PAHLAVI)
+UBLOCKCODE(OLD_TURKIC)
+UBLOCKCODE(RUMI_NUMERAL_SYMBOLS)
+UBLOCKCODE(KAITHI)
+UBLOCKCODE(EGYPTIAN_HIEROGLYPHS)
+UBLOCKCODE(ENCLOSED_ALPHANUMERIC_SUPPLEMENT)
+UBLOCKCODE(ENCLOSED_IDEOGRAPHIC_SUPPLEMENT)
+UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C)
+#endif /* ICU >= 4.4 */
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 46
+UBLOCKCODE(MANDAIC)
+UBLOCKCODE(BATAK)
+UBLOCKCODE(ETHIOPIC_EXTENDED_A)
+UBLOCKCODE(BRAHMI)
+UBLOCKCODE(BAMUM_SUPPLEMENT)
+UBLOCKCODE(KANA_SUPPLEMENT)
+UBLOCKCODE(PLAYING_CARDS)
+UBLOCKCODE(MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS)
+UBLOCKCODE(EMOTICONS)
+UBLOCKCODE(TRANSPORT_AND_MAP_SYMBOLS)
+UBLOCKCODE(ALCHEMICAL_SYMBOLS)
+UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D)
+#endif /* ICU >= 49.0 */
+#if U_ICU_VERSION_MAJOR_NUM >= 49
+UBLOCKCODE(ARABIC_EXTENDED_A)
+UBLOCKCODE(ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS)
+UBLOCKCODE(CHAKMA)
+UBLOCKCODE(MEETEI_MAYEK_EXTENSIONS)
+UBLOCKCODE(MEROITIC_CURSIVE)
+UBLOCKCODE(MEROITIC_HIEROGLYPHS)
+UBLOCKCODE(MIAO)
+UBLOCKCODE(SHARADA)
+UBLOCKCODE(SORA_SOMPENG)
+UBLOCKCODE(SUNDANESE_SUPPLEMENT)
+UBLOCKCODE(TAKRI)
+#endif /* ICU >= 49 */
+#if U_ICU_VERSION_MAJOR_NUM >= 54
+UBLOCKCODE(BASSA_VAH)
+UBLOCKCODE(CAUCASIAN_ALBANIAN)
+UBLOCKCODE(COPTIC_EPACT_NUMBERS)
+UBLOCKCODE(COMBINING_DIACRITICAL_MARKS_EXTENDED)
+UBLOCKCODE(DUPLOYAN)
+UBLOCKCODE(ELBASAN)
+UBLOCKCODE(GEOMETRIC_SHAPES_EXTENDED)
+UBLOCKCODE(GRANTHA)
+UBLOCKCODE(KHOJKI)
+UBLOCKCODE(KHUDAWADI)
+UBLOCKCODE(LATIN_EXTENDED_E)
+UBLOCKCODE(LINEAR_A)
+UBLOCKCODE(MAHAJANI)
+UBLOCKCODE(MANICHAEAN)
+UBLOCKCODE(MENDE_KIKAKUI)
+UBLOCKCODE(MODI)
+UBLOCKCODE(MRO)
+UBLOCKCODE(MYANMAR_EXTENDED_B)
+UBLOCKCODE(NABATAEAN)
+UBLOCKCODE(OLD_NORTH_ARABIAN)
+UBLOCKCODE(OLD_PERMIC)
+UBLOCKCODE(ORNAMENTAL_DINGBATS)
+UBLOCKCODE(PAHAWH_HMONG)
+UBLOCKCODE(PALMYRENE)
+UBLOCKCODE(PAU_CIN_HAU)
+UBLOCKCODE(PSALTER_PAHLAVI)
+UBLOCKCODE(SHORTHAND_FORMAT_CONTROLS)
+UBLOCKCODE(SIDDHAM)
+UBLOCKCODE(SINHALA_ARCHAIC_NUMBERS)
+UBLOCKCODE(SUPPLEMENTAL_ARROWS_C)
+UBLOCKCODE(TIRHUTA)
+UBLOCKCODE(WARANG_CITI)
+#endif /* ICU >= 54 */
+UBLOCKCODE(COUNT)
+UBLOCKCODE(INVALID_CODE)
diff --git a/ext/intl/uchar/uchar.c b/ext/intl/uchar/uchar.c
new file mode 100644
index 0000000000..b65417bc60
--- /dev/null
+++ b/ext/intl/uchar/uchar.c
@@ -0,0 +1,769 @@
+#include "uchar.h"
+#include "ext/intl/intl_data.h"
+#include "ext/intl/intl_convert.h"
+
+#include <unicode/uchar.h>
+
+#define IC_METHOD(mname) PHP_METHOD(IntlChar, mname)
+
+inline int convert_cp(UChar32* pcp, zval *zcp) {
+ zend_long cp = -1;
+ if (Z_TYPE_P(zcp) == IS_LONG) {
+ cp = Z_LVAL_P(zcp);
+ } else if (Z_TYPE_P(zcp) == IS_STRING) {
+ int i = 0;
+ U8_NEXT(Z_STRVAL_P(zcp), i, Z_STRLEN_P(zcp), cp);
+ if (i != Z_STRLEN_P(zcp)) {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
+ return FAILURE;
+ }
+ } else {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point. Must be either integer or UTF-8 sequence.", 0);
+ return FAILURE;
+ }
+ if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
+ return FAILURE;
+ }
+ *pcp = (UChar32)cp;
+ return SUCCESS;
+}
+
+/* {{{ proto string IntlChar::chr(int|string $char)
+ * Converts a numeric codepoint to UTF-8
+ * Acts as an identify function when given a valid UTF-8 encoded codepoint
+ */
+ZEND_BEGIN_ARG_INFO_EX(chr_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, codepoint)
+ZEND_END_ARG_INFO();
+IC_METHOD(chr) {
+ UChar32 cp;
+ zval *zcp;
+ char buffer[5];
+ int buffer_len = 0;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ /* We can use unsafe because we know the codepoint is in valid range
+ * and that 4 bytes is enough for any unicode point
+ */
+ U8_APPEND_UNSAFE(buffer, buffer_len, cp);
+ buffer[buffer_len] = 0;
+ RETURN_STRINGL(buffer, buffer_len);
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::ord(int|string $codepoint)
+ * Converts a UTf-8 encoded codepoint to its integer U32 value
+ * Acts as an identity function when passed a valid integer codepoint
+ */
+ZEND_BEGIN_ARG_INFO_EX(ord_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, character)
+ZEND_END_ARG_INFO();
+IC_METHOD(ord) {
+ UChar32 cp;
+ zval *zcp;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ RETURN_LONG(cp);
+}
+/* }}} */
+
+/* {{{ proto bool IntlChar::hasBinaryProperty(int|string $char, int $property) */
+ZEND_BEGIN_ARG_INFO_EX(hasBinaryProperty_arginfo, 0, ZEND_RETURN_VALUE, 2)
+ ZEND_ARG_INFO(0, codepoint)
+ ZEND_ARG_INFO(0, property)
+ZEND_END_ARG_INFO();
+IC_METHOD(hasBinaryProperty) {
+ UChar32 cp;
+ zend_long prop;
+ zval *zcp;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ RETURN_BOOL(u_hasBinaryProperty(cp, (UProperty)prop));
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::getIntPropertyValue(int|string $char, int $property) */
+ZEND_BEGIN_ARG_INFO_EX(getIntPropertyValue_arginfo, 0, ZEND_RETURN_VALUE, 2)
+ ZEND_ARG_INFO(0, codepoint)
+ ZEND_ARG_INFO(0, property)
+ZEND_END_ARG_INFO();
+IC_METHOD(getIntPropertyValue) {
+ UChar32 cp;
+ zend_long prop;
+ zval *zcp;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ RETURN_LONG(u_getIntPropertyValue(cp, (UProperty)prop));
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::getIntPropertyMinValue(int $property) */
+ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMinValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, property)
+ZEND_END_ARG_INFO();
+IC_METHOD(getIntPropertyMinValue) {
+ zend_long prop;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
+ return;
+ }
+
+ RETURN_LONG(u_getIntPropertyMinValue((UProperty)prop));
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::getIntPropertyMxValue(int $property) */
+ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMaxValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, property)
+ZEND_END_ARG_INFO();
+IC_METHOD(getIntPropertyMaxValue) {
+ zend_long prop;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
+ return;
+ }
+
+ RETURN_LONG(u_getIntPropertyMaxValue((UProperty)prop));
+}
+/* }}} */
+
+/* {{{ proto float IntlChar::getNumericValue(int|string $char) */
+ZEND_BEGIN_ARG_INFO_EX(getNumericValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, codepoint)
+ZEND_END_ARG_INFO();
+IC_METHOD(getNumericValue) {
+ UChar32 cp;
+ zval *zcp;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ RETURN_DOUBLE(u_getNumericValue(cp));
+}
+/* }}} */
+
+/* {{{ proto void IntlChar::enumCharTypes(callable $cb) */
+ZEND_BEGIN_ARG_INFO_EX(enumCharTypes_arginfo, 0, ZEND_RETURN_VALUE, 0)
+ ZEND_ARG_INFO(0, callback)
+ZEND_END_ARG_INFO();
+typedef struct _enumCharType_data {
+ zend_fcall_info fci;
+ zend_fcall_info_cache fci_cache;
+} enumCharType_data;
+static UBool enumCharType_callback(enumCharType_data *context,
+ UChar32 start, UChar32 limit,
+ UCharCategory type) {
+ zval retval;
+ zval args[3];
+
+ ZVAL_NULL(&retval);
+ /* Note that $start is INclusive, whiel $limit is EXclusive
+ * Therefore (0, 32, 15) means CPs 0..31 are of type 15
+ */
+ ZVAL_LONG(&args[0], start);
+ ZVAL_LONG(&args[1], limit);
+ ZVAL_LONG(&args[2], type);
+
+ context->fci.retval = &retval;
+ context->fci.param_count = 3;
+ context->fci.params = args;
+
+ if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
+ intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
+ intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0);
+ zval_dtor(&retval);
+ return 0;
+ }
+ zval_dtor(&retval);
+ return 1;
+}
+IC_METHOD(enumCharTypes) {
+ enumCharType_data context;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "f", &context.fci, &context.fci_cache) == FAILURE) {
+ return;
+ }
+ u_enumCharTypes((UCharEnumTypeRange*)enumCharType_callback, &context);
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::getBlockCode(int|string $char) */
+ZEND_BEGIN_ARG_INFO_EX(getBlockCode_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, codepoint)
+ZEND_END_ARG_INFO()
+IC_METHOD(getBlockCode) {
+ UChar32 cp;
+ zval *zcp;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ RETURN_LONG(ublock_getCode(cp));
+}
+/* }}} */
+
+/* {{{ proto string IntlChar::charName(int|string $char, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
+ZEND_BEGIN_ARG_INFO_EX(charName_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, codepoint)
+ ZEND_ARG_INFO(0, nameChoice)
+ZEND_END_ARG_INFO()
+IC_METHOD(charName) {
+ UChar32 cp;
+ zval *zcp;
+ UErrorCode error = U_ZERO_ERROR;
+ zend_long nameChoice = U_UNICODE_CHAR_NAME;
+ zend_string *buffer = NULL;
+ int32_t buffer_len;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &nameChoice) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, NULL, 0, &error);
+ buffer = zend_string_alloc(buffer_len, 0);
+ error = U_ZERO_ERROR;
+ buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, buffer->val, buffer->len + 1, &error);
+ if (U_FAILURE(error)) {
+ zend_string_free(buffer);
+ INTL_CHECK_STATUS(error, "Failure getting character name");
+ }
+ RETURN_NEW_STR(buffer);
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::charFromName(string $name, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
+ZEND_BEGIN_ARG_INFO_EX(charFromName_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, characterName)
+ ZEND_ARG_INFO(0, nameChoice)
+ZEND_END_ARG_INFO()
+IC_METHOD(charFromName) {
+ char *name;
+ size_t name_len;
+ zend_long nameChoice = U_UNICODE_CHAR_NAME;
+ UChar32 ret;
+ UErrorCode error = U_ZERO_ERROR;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &name, &name_len, &nameChoice) == FAILURE) {
+ return;
+ }
+
+ ret = u_charFromName((UCharNameChoice)nameChoice, name, &error);
+ INTL_CHECK_STATUS(error, NULL);
+ RETURN_LONG(ret);
+}
+/* }}} */
+
+/* {{{ void void IntlChar::enumCharNames(int|string $start, int|string $limit, callable $callback, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
+ZEND_BEGIN_ARG_INFO_EX(enumCharNames_arginfo, 0, ZEND_RETURN_VALUE, 3)
+ ZEND_ARG_INFO(0, start)
+ ZEND_ARG_INFO(0, limit)
+ ZEND_ARG_INFO(0, callback)
+ ZEND_ARG_INFO(0, nameChoice)
+ZEND_END_ARG_INFO();
+typedef struct _enumCharNames_data {
+ zend_fcall_info fci;
+ zend_fcall_info_cache fci_cache;
+} enumCharNames_data;
+static UBool enumCharNames_callback(enumCharNames_data *context,
+ UChar32 code, UCharNameChoice nameChoice,
+ const char *name, int32_t length) {
+ zval retval;
+ zval args[3];
+
+ ZVAL_NULL(&retval);
+ ZVAL_LONG(&args[0], code);
+ ZVAL_LONG(&args[1], nameChoice);
+ ZVAL_STRINGL(&args[2], name, length);
+
+ context->fci.retval = &retval;
+ context->fci.param_count = 3;
+ context->fci.params = args;
+
+ if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
+ intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
+ intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0);
+ zval_dtor(&retval);
+ zval_dtor(&args[2]);
+ return 0;
+ }
+ zval_dtor(&retval);
+ zval_dtor(&args[2]);
+ return 1;
+}
+IC_METHOD(enumCharNames) {
+ UChar32 start, limit;
+ zval *zstart, *zlimit;
+ enumCharNames_data context;
+ zend_long nameChoice = U_UNICODE_CHAR_NAME;
+ UErrorCode error = U_ZERO_ERROR;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zzf|l", &zstart, &zlimit, &context.fci, &context.fci_cache, &nameChoice) == FAILURE) ||
+ (convert_cp(&start, zstart) == FAILURE) ||
+ (convert_cp(&limit, zlimit) == FAILURE)) {
+ return;
+ }
+
+ u_enumCharNames(start, limit, (UEnumCharNamesFn*)enumCharNames_callback, &context, nameChoice, &error);
+ INTL_CHECK_STATUS(error, NULL);
+}
+/* }}} */
+
+/* {{{ proto string IntlChar::getPropertyName(int $property, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
+ZEND_BEGIN_ARG_INFO_EX(getPropertyName_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, property)
+ ZEND_ARG_INFO(0, nameChoice)
+ZEND_END_ARG_INFO();
+IC_METHOD(getPropertyName) {
+ zend_long property;
+ zend_long nameChoice = U_LONG_PROPERTY_NAME;
+ const char *ret;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &property, &nameChoice) == FAILURE) {
+ return;
+ }
+
+ ret = u_getPropertyName((UProperty)property, (UPropertyNameChoice)nameChoice);
+ if (ret) {
+ RETURN_STRING(ret);
+ } else {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
+ RETURN_FALSE;
+ }
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::getPropertyEnum(string $alias) */
+ZEND_BEGIN_ARG_INFO_EX(getPropertyEnum_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, alias)
+ZEND_END_ARG_INFO();
+IC_METHOD(getPropertyEnum) {
+ char *alias;
+ size_t alias_len;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &alias, &alias_len) == FAILURE) {
+ return;
+ }
+
+ RETURN_LONG(u_getPropertyEnum(alias));
+}
+/* }}} */
+
+/* {{{ proto string IntlChar::getPropertyValueName(int $prop, int $val[, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
+ZEND_BEGIN_ARG_INFO_EX(getPropertyValueName_arginfo, 0, ZEND_RETURN_VALUE, 2)
+ ZEND_ARG_INFO(0, property)
+ ZEND_ARG_INFO(0, value)
+ ZEND_ARG_INFO(0, nameChoice)
+ZEND_END_ARG_INFO();
+IC_METHOD(getPropertyValueName) {
+ zend_long property, value, nameChoice = U_LONG_PROPERTY_NAME;
+ const char *ret;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll|l", &property, &value, &nameChoice) == FAILURE) {
+ return;
+ }
+
+ ret = u_getPropertyValueName((UProperty)property, value, (UPropertyNameChoice)nameChoice);
+ if (ret) {
+ RETURN_STRING(ret);
+ } else {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
+ RETURN_FALSE;
+ }
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::getPropertyValueEnum(int $property, string $name) */
+ZEND_BEGIN_ARG_INFO_EX(getPropertyValueEnum_arginfo, 0, ZEND_RETURN_VALUE, 2)
+ ZEND_ARG_INFO(0, property)
+ ZEND_ARG_INFO(0, name)
+ZEND_END_ARG_INFO();
+IC_METHOD(getPropertyValueEnum) {
+ zend_long property;
+ char *name;
+ size_t name_len;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ls", &property, &name, &name_len) == FAILURE) {
+ return;
+ }
+
+ RETURN_LONG(u_getPropertyValueEnum((UProperty)property, name));
+}
+/* }}} */
+
+/* {{{ proto int|string IntlChar::foldCase(int|string $char, int $options = IntlChar::FOLD_CASE_DEFAULT) */
+ZEND_BEGIN_ARG_INFO_EX(foldCase_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, foldCase)
+ZEND_END_ARG_INFO();
+IC_METHOD(foldCase) {
+ UChar32 cp, ret;
+ zval *zcp;
+ zend_long options = U_FOLD_CASE_DEFAULT;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &options) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ ret = u_foldCase(cp, options);
+ if (Z_TYPE_P(zcp) == IS_STRING) {
+ char buffer[5];
+ int buffer_len = 0;
+ U8_APPEND_UNSAFE(buffer, buffer_len, ret);
+ buffer[buffer_len] = 0;
+ RETURN_STRINGL(buffer, buffer_len);
+ } else {
+ RETURN_LONG(ret);
+ }
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::digit(int|string $char[, int $radix = 10]) */
+ZEND_BEGIN_ARG_INFO_EX(digit_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, codepoint)
+ ZEND_ARG_INFO(0, radix)
+ZEND_END_ARG_INFO();
+IC_METHOD(digit) {
+ UChar32 cp;
+ zval *zcp;
+ zend_long radix = 10;
+ int ret;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &radix) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ ret = u_digit(cp, radix);
+ if (ret < 0) {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Invalid digit", 0);
+ RETURN_FALSE;
+ }
+ RETURN_LONG(ret);
+}
+/* }}} */
+
+/* {{{ proto int IntlChar::forDigit(int $digit[, int $radix = 10]) */
+ZEND_BEGIN_ARG_INFO_EX(forDigit_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, digit)
+ ZEND_ARG_INFO(0, radix)
+ZEND_END_ARG_INFO();
+IC_METHOD(forDigit) {
+ zend_long digit, radix = 10;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll", &digit, &radix) == FAILURE) {
+ return;
+ }
+
+ RETURN_LONG(u_forDigit(digit, radix));
+}
+/* }}} */
+
+/* {{{ proto array IntlChar::charAge(int|string $char) */
+ZEND_BEGIN_ARG_INFO_EX(charAge_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, codepoint)
+ZEND_END_ARG_INFO();
+IC_METHOD(charAge) {
+ UChar32 cp;
+ zval *zcp;
+ UVersionInfo version;
+ int i;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ u_charAge(cp, version);
+ array_init(return_value);
+ for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
+ add_next_index_long(return_value, version[i]);
+ }
+}
+/* }}} */
+
+/* {{{ proto array IntlChar::getUnicodeVersion() */
+ZEND_BEGIN_ARG_INFO_EX(getUnicodeVersion_arginfo, 0, ZEND_RETURN_VALUE, 0)
+ZEND_END_ARG_INFO();
+IC_METHOD(getUnicodeVersion) {
+ UVersionInfo version;
+ int i;
+
+
+ u_getUnicodeVersion(version);
+ array_init(return_value);
+ for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
+ add_next_index_long(return_value, version[i]);
+ }
+}
+/* }}} */
+
+/* {{{ proto string IntlChar::getFC_NFKC_Closure(int|string $char) */
+ZEND_BEGIN_ARG_INFO_EX(getFC_NFKC_Closure_arginfo, 0, ZEND_RETURN_VALUE, 1)
+ ZEND_ARG_INFO(0, codepoint)
+ZEND_END_ARG_INFO();
+IC_METHOD(getFC_NFKC_Closure) {
+ UChar32 cp;
+ zval *zcp;
+ UChar *closure;
+ char *ret;
+ int32_t closure_len, ret_len;
+ UErrorCode error = U_ZERO_ERROR;
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
+ (convert_cp(&cp, zcp) == FAILURE)) {
+ return;
+ }
+
+ closure_len = u_getFC_NFKC_Closure(cp, NULL, 0, &error);
+ if (closure_len == 0) {
+ RETURN_EMPTY_STRING();
+ }
+ closure = safe_emalloc(sizeof(UChar), closure_len + 1, 0);
+ error = U_ZERO_ERROR;
+ closure_len = u_getFC_NFKC_Closure(cp, closure, closure_len, &error);
+ if (U_FAILURE(error)) {
+ efree(closure);
+ INTL_CHECK_STATUS(error, "Failed getting closure");
+ }
+
+ error = U_ZERO_ERROR;
+ intl_convert_utf16_to_utf8(&ret, &ret_len, closure, closure_len, &error);
+ efree(closure);
+ INTL_CHECK_STATUS(error, "Failed converting output to UTF8");
+ RETVAL_STRINGL(ret, ret_len);
+ efree(ret);
+}
+/* }}} */
+
+/* {{{ proto bool IntlChar::<name>(int|string $char) */
+#define IC_BOOL_METHOD_CHAR(name) \
+ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
+ ZEND_ARG_INFO(0, codepoint) \
+ZEND_END_ARG_INFO(); \
+IC_METHOD(name) { \
+ UChar32 cp; zval *zcp; \
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
+ (convert_cp(&cp, zcp) == FAILURE)) { return; } \
+ RETURN_BOOL(u_##name(cp)); \
+}
+IC_BOOL_METHOD_CHAR(isUAlphabetic)
+IC_BOOL_METHOD_CHAR(isULowercase)
+IC_BOOL_METHOD_CHAR(isUUppercase)
+IC_BOOL_METHOD_CHAR(isUWhiteSpace)
+IC_BOOL_METHOD_CHAR(islower)
+IC_BOOL_METHOD_CHAR(isupper)
+IC_BOOL_METHOD_CHAR(istitle)
+IC_BOOL_METHOD_CHAR(isdigit)
+IC_BOOL_METHOD_CHAR(isalpha)
+IC_BOOL_METHOD_CHAR(isalnum)
+IC_BOOL_METHOD_CHAR(isxdigit)
+IC_BOOL_METHOD_CHAR(ispunct)
+IC_BOOL_METHOD_CHAR(isgraph)
+IC_BOOL_METHOD_CHAR(isblank)
+IC_BOOL_METHOD_CHAR(isdefined)
+IC_BOOL_METHOD_CHAR(isspace)
+IC_BOOL_METHOD_CHAR(isJavaSpaceChar)
+IC_BOOL_METHOD_CHAR(isWhitespace)
+IC_BOOL_METHOD_CHAR(iscntrl)
+IC_BOOL_METHOD_CHAR(isISOControl)
+IC_BOOL_METHOD_CHAR(isprint)
+IC_BOOL_METHOD_CHAR(isbase)
+IC_BOOL_METHOD_CHAR(isMirrored)
+IC_BOOL_METHOD_CHAR(isIDStart)
+IC_BOOL_METHOD_CHAR(isIDPart)
+IC_BOOL_METHOD_CHAR(isIDIgnorable)
+IC_BOOL_METHOD_CHAR(isJavaIDStart)
+IC_BOOL_METHOD_CHAR(isJavaIDPart)
+#undef IC_BOOL_METHOD_CHAR
+/* }}} */
+
+/* {{{ proto int IntlChar::<name>(int|string $char) */
+#define IC_INT_METHOD_CHAR(name) \
+ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
+ ZEND_ARG_INFO(0, codepoint) \
+ZEND_END_ARG_INFO(); \
+IC_METHOD(name) { \
+ UChar32 cp; zval *zcp; \
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
+ (convert_cp(&cp, zcp) == FAILURE)) { return; } \
+ RETURN_LONG(u_##name(cp)); \
+}
+IC_INT_METHOD_CHAR(charDirection)
+IC_INT_METHOD_CHAR(charType)
+IC_INT_METHOD_CHAR(getCombiningClass)
+IC_INT_METHOD_CHAR(charDigitValue)
+#undef IC_INT_METHOD_CHAR
+/* }}} */
+
+/* {{{ proto int|string IntlChar::<name>(int|string $char)
+ * Returns a utf-8 character if codepoint was passed as a utf-8 sequence
+ * Returns an int otherwise
+ */
+#define IC_CHAR_METHOD_CHAR(name) \
+ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
+ ZEND_ARG_INFO(0, codepoint) \
+ZEND_END_ARG_INFO(); \
+IC_METHOD(name) { \
+ UChar32 cp, ret; zval *zcp; \
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
+ (convert_cp(&cp, zcp) == FAILURE)) { return; } \
+ ret = u_##name(cp); \
+ if (Z_TYPE_P(zcp) == IS_STRING) { \
+ char buffer[5]; \
+ int buffer_len = 0; \
+ U8_APPEND_UNSAFE(buffer, buffer_len, ret); \
+ buffer[buffer_len] = 0; \
+ RETURN_STRINGL(buffer, buffer_len); \
+ } else { \
+ RETURN_LONG(ret); \
+ } \
+}
+IC_CHAR_METHOD_CHAR(charMirror)
+IC_CHAR_METHOD_CHAR(tolower)
+IC_CHAR_METHOD_CHAR(toupper)
+IC_CHAR_METHOD_CHAR(totitle)
+#if U_ICU_VERSION_MAJOR_NUM >= 52
+IC_CHAR_METHOD_CHAR(getBidiPairedBracket)
+#endif /* ICU >= 52 */
+#undef IC_CHAR_METHOD_CHAR
+/* }}} */
+
+static zend_function_entry intlchar_methods[] = {
+#define IC_ME(mname) PHP_ME(IntlChar, mname, mname##_arginfo, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
+ IC_ME(chr)
+ IC_ME(ord)
+ IC_ME(hasBinaryProperty)
+ IC_ME(isUAlphabetic)
+ IC_ME(isULowercase)
+ IC_ME(isUUppercase)
+ IC_ME(isUWhiteSpace)
+ IC_ME(getIntPropertyValue)
+ IC_ME(getIntPropertyMinValue)
+ IC_ME(getIntPropertyMaxValue)
+ IC_ME(getNumericValue)
+ IC_ME(islower)
+ IC_ME(isupper)
+ IC_ME(istitle)
+ IC_ME(isdigit)
+ IC_ME(isalpha)
+ IC_ME(isalnum)
+ IC_ME(isxdigit)
+ IC_ME(ispunct)
+ IC_ME(isgraph)
+ IC_ME(isblank)
+ IC_ME(isdefined)
+ IC_ME(isspace)
+ IC_ME(isJavaSpaceChar)
+ IC_ME(isWhitespace)
+ IC_ME(iscntrl)
+ IC_ME(isISOControl)
+ IC_ME(isprint)
+ IC_ME(isbase)
+ IC_ME(charDirection)
+ IC_ME(isMirrored)
+ IC_ME(charMirror)
+#if U_ICU_VERSION_MAJOR_NUM >= 52
+ IC_ME(getBidiPairedBracket)
+#endif /* ICU >= 52 */
+ IC_ME(charType)
+ IC_ME(enumCharTypes)
+ IC_ME(getCombiningClass)
+ IC_ME(charDigitValue)
+ IC_ME(getBlockCode)
+ IC_ME(charName)
+ IC_ME(charFromName)
+ IC_ME(enumCharNames)
+ IC_ME(getPropertyName)
+ IC_ME(getPropertyEnum)
+ IC_ME(getPropertyValueName)
+ IC_ME(getPropertyValueEnum)
+ IC_ME(isIDStart)
+ IC_ME(isIDPart)
+ IC_ME(isIDIgnorable)
+ IC_ME(isJavaIDStart)
+ IC_ME(isJavaIDPart)
+ IC_ME(tolower)
+ IC_ME(toupper)
+ IC_ME(totitle)
+ IC_ME(foldCase)
+ IC_ME(digit)
+ IC_ME(forDigit)
+ IC_ME(charAge)
+ IC_ME(getUnicodeVersion)
+ IC_ME(getFC_NFKC_Closure)
+#undef IC_ME
+ PHP_FE_END
+};
+
+int php_uchar_minit(INIT_FUNC_ARGS) {
+ zend_class_entry tmp, *ce;
+
+ INIT_CLASS_ENTRY(tmp, "IntlChar", intlchar_methods);
+ ce = zend_register_internal_class(&tmp);
+
+#define IC_CONSTL(name, val) \
+ zend_declare_class_constant_long(ce, name, strlen(name), val);
+
+ zend_declare_class_constant_string(ce, "UNICODE_VERSION", sizeof("UNICODE_VERISON")-1, U_UNICODE_VERSION);
+ IC_CONSTL("CODEPOINT_MIN", UCHAR_MIN_VALUE)
+ IC_CONSTL("CODEPOINT_MAX", UCHAR_MAX_VALUE)
+
+ /* All enums used by the uchar APIs. There are a LOT of them,
+ * so they're separated out into include files,
+ * leaving this source file for actual implementation.
+ */
+#define UPROPERTY(name) IC_CONSTL("PROPERTY_" #name, UCHAR_##name)
+#include "uproperty-enum.h"
+#undef UPROPERTY
+
+#define UCHARCATEGORY(name) IC_CONSTL("CHAR_CATEGORY_" #name, U_##name)
+#include "ucharcategory-enum.h"
+#undef UCHARCATEGORY
+
+#define UCHARDIRECTION(name) IC_CONSTL("CHAR_DIRECTION_" #name, U_##name)
+#include "uchardirection-enum.h"
+#undef UCHARDIRECTION
+
+#define UBLOCKCODE(name) IC_CONSTL("BLOCK_CODE_" #name, UBLOCK_##name)
+#include "ublockcode-enum.h"
+#undef UBLOCKCODE
+
+ /* Smaller, self-destribing enums */
+#define UOTHER(name) IC_CONSTL(#name, U_##name)
+#include "uother-enum.h"
+#undef UOTHER
+
+#undef IC_CONSTL
+#undef IC_CONSTS
+
+ return SUCCESS;
+}
+
diff --git a/ext/intl/uchar/uchar.h b/ext/intl/uchar/uchar.h
new file mode 100644
index 0000000000..25bab1f215
--- /dev/null
+++ b/ext/intl/uchar/uchar.h
@@ -0,0 +1,8 @@
+#ifndef incl_PHP_INTL_UCHAR_H
+#define incl_PHP_INTL_UCHAR_H
+
+#include "php.h"
+
+int php_uchar_minit(INIT_FUNC_ARGS);
+
+#endif // incl_PHP_INTL_UCHAR_H
diff --git a/ext/intl/uchar/ucharcategory-enum.h b/ext/intl/uchar/ucharcategory-enum.h
new file mode 100644
index 0000000000..2db8a2f860
--- /dev/null
+++ b/ext/intl/uchar/ucharcategory-enum.h
@@ -0,0 +1,33 @@
+/* Generate from http://icu-project.org/apiref/icu4c/uchar_8h.html#a6a2dbc531efce8d77fdb4c314e7fc25e */
+UCHARCATEGORY(UNASSIGNED)
+UCHARCATEGORY(GENERAL_OTHER_TYPES)
+UCHARCATEGORY(UPPERCASE_LETTER)
+UCHARCATEGORY(LOWERCASE_LETTER)
+UCHARCATEGORY(TITLECASE_LETTER)
+UCHARCATEGORY(MODIFIER_LETTER)
+UCHARCATEGORY(OTHER_LETTER)
+UCHARCATEGORY(NON_SPACING_MARK)
+UCHARCATEGORY(ENCLOSING_MARK)
+UCHARCATEGORY(COMBINING_SPACING_MARK)
+UCHARCATEGORY(DECIMAL_DIGIT_NUMBER)
+UCHARCATEGORY(LETTER_NUMBER)
+UCHARCATEGORY(OTHER_NUMBER)
+UCHARCATEGORY(SPACE_SEPARATOR)
+UCHARCATEGORY(LINE_SEPARATOR)
+UCHARCATEGORY(PARAGRAPH_SEPARATOR)
+UCHARCATEGORY(CONTROL_CHAR)
+UCHARCATEGORY(FORMAT_CHAR)
+UCHARCATEGORY(PRIVATE_USE_CHAR)
+UCHARCATEGORY(SURROGATE)
+UCHARCATEGORY(DASH_PUNCTUATION)
+UCHARCATEGORY(START_PUNCTUATION)
+UCHARCATEGORY(END_PUNCTUATION)
+UCHARCATEGORY(CONNECTOR_PUNCTUATION)
+UCHARCATEGORY(OTHER_PUNCTUATION)
+UCHARCATEGORY(MATH_SYMBOL)
+UCHARCATEGORY(CURRENCY_SYMBOL)
+UCHARCATEGORY(MODIFIER_SYMBOL)
+UCHARCATEGORY(OTHER_SYMBOL)
+UCHARCATEGORY(INITIAL_PUNCTUATION)
+UCHARCATEGORY(FINAL_PUNCTUATION)
+UCHARCATEGORY(CHAR_CATEGORY_COUNT)
diff --git a/ext/intl/uchar/uchardirection-enum.h b/ext/intl/uchar/uchardirection-enum.h
new file mode 100644
index 0000000000..afbbdb28cb
--- /dev/null
+++ b/ext/intl/uchar/uchardirection-enum.h
@@ -0,0 +1,26 @@
+UCHARDIRECTION(LEFT_TO_RIGHT)
+UCHARDIRECTION(RIGHT_TO_LEFT)
+UCHARDIRECTION(EUROPEAN_NUMBER)
+UCHARDIRECTION(EUROPEAN_NUMBER_SEPARATOR)
+UCHARDIRECTION(EUROPEAN_NUMBER_TERMINATOR)
+UCHARDIRECTION(ARABIC_NUMBER)
+UCHARDIRECTION(COMMON_NUMBER_SEPARATOR)
+UCHARDIRECTION(BLOCK_SEPARATOR)
+UCHARDIRECTION(SEGMENT_SEPARATOR)
+UCHARDIRECTION(WHITE_SPACE_NEUTRAL)
+UCHARDIRECTION(OTHER_NEUTRAL)
+UCHARDIRECTION(LEFT_TO_RIGHT_EMBEDDING)
+UCHARDIRECTION(LEFT_TO_RIGHT_OVERRIDE)
+UCHARDIRECTION(RIGHT_TO_LEFT_ARABIC)
+UCHARDIRECTION(RIGHT_TO_LEFT_EMBEDDING)
+UCHARDIRECTION(RIGHT_TO_LEFT_OVERRIDE)
+UCHARDIRECTION(POP_DIRECTIONAL_FORMAT)
+UCHARDIRECTION(DIR_NON_SPACING_MARK)
+UCHARDIRECTION(BOUNDARY_NEUTRAL)
+#if U_ICU_VERSION_MAJOR_NUM >= 52
+UCHARDIRECTION(FIRST_STRONG_ISOLATE)
+UCHARDIRECTION(LEFT_TO_RIGHT_ISOLATE)
+UCHARDIRECTION(RIGHT_TO_LEFT_ISOLATE)
+UCHARDIRECTION(POP_DIRECTIONAL_ISOLATE)
+#endif /* ICU >= 52 */
+UCHARDIRECTION(CHAR_DIRECTION_COUNT)
diff --git a/ext/intl/uchar/uother-enum.h b/ext/intl/uchar/uother-enum.h
new file mode 100644
index 0000000000..babcf5cb16
--- /dev/null
+++ b/ext/intl/uchar/uother-enum.h
@@ -0,0 +1,284 @@
+/* UBidiPairedBracketType - http://icu-project.org/apiref/icu4c/uchar_8h.html#af954219aa1df452657ec355221c6703d */
+#if U_ICU_VERSION_MAJOR_NUM >= 52
+UOTHER(BPT_NONE)
+UOTHER(BPT_OPEN)
+UOTHER(BPT_CLOSE)
+UOTHER(BPT_COUNT)
+#endif /* ICU >= 52 */
+
+/* UEastAsianWidth - http://icu-project.org/apiref/icu4c/uchar_8h.html#a95cc2ca2f9cfd6d0c63eee2c65951333 */
+UOTHER(EA_NEUTRAL)
+UOTHER(EA_AMBIGUOUS)
+UOTHER(EA_HALFWIDTH)
+UOTHER(EA_FULLWIDTH )
+UOTHER(EA_NARROW)
+UOTHER(EA_WIDE)
+UOTHER(EA_COUNT)
+
+/* UCharNameChoice - http://icu-project.org/apiref/icu4c/uchar_8h.html#a2ba37edcca62eff48226e8096035addf */
+UOTHER(UNICODE_CHAR_NAME)
+UOTHER(UNICODE_10_CHAR_NAME)
+UOTHER(EXTENDED_CHAR_NAME)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44
+UOTHER(CHAR_NAME_ALIAS)
+#endif /* ICU >= 4.4 */
+UOTHER(CHAR_NAME_CHOICE_COUNT)
+
+/* UPropertyNameChoice - http://icu-project.org/apiref/icu4c/uchar_8h.html#a5056494c7d5a2c7185f3c464f48fe5d1 */
+UOTHER(SHORT_PROPERTY_NAME)
+UOTHER(LONG_PROPERTY_NAME)
+UOTHER(PROPERTY_NAME_CHOICE_COUNT)
+
+/* UDecompositionType - http://icu-project.org/apiref/icu4c/uchar_8h.html#ae2c56994fcf28062c7e77beb671533f5 */
+UOTHER(DT_NONE)
+UOTHER(DT_CANONICAL)
+UOTHER(DT_COMPAT)
+UOTHER(DT_CIRCLE)
+UOTHER(DT_FINAL)
+UOTHER(DT_FONT)
+UOTHER(DT_FRACTION)
+UOTHER(DT_INITIAL)
+UOTHER(DT_ISOLATED)
+UOTHER(DT_MEDIAL)
+UOTHER(DT_NARROW)
+UOTHER(DT_NOBREAK)
+UOTHER(DT_SMALL)
+UOTHER(DT_SQUARE)
+UOTHER(DT_SUB)
+UOTHER(DT_SUPER)
+UOTHER(DT_VERTICAL)
+UOTHER(DT_WIDE)
+UOTHER(DT_COUNT )
+
+/* UJoiningType - http://icu-project.org/apiref/icu4c/uchar_8h.html#a3ce1ce20e7f3b8534eb3490ad3aba3dd */
+UOTHER(JT_NON_JOINING)
+UOTHER(JT_JOIN_CAUSING)
+UOTHER(JT_DUAL_JOINING)
+UOTHER(JT_LEFT_JOINING)
+UOTHER(JT_RIGHT_JOINING)
+UOTHER(JT_TRANSPARENT)
+UOTHER(JT_COUNT)
+
+/* UJoiningGroup - http://icu-project.org/apiref/icu4c/uchar_8h.html#a7887844ec0872e6e9a283e0825fcae65 */
+UOTHER(JG_NO_JOINING_GROUP)
+UOTHER(JG_AIN)
+UOTHER(JG_ALAPH)
+UOTHER(JG_ALEF)
+UOTHER(JG_BEH)
+UOTHER(JG_BETH)
+UOTHER(JG_DAL)
+UOTHER(JG_DALATH_RISH)
+UOTHER(JG_E)
+UOTHER(JG_FEH)
+UOTHER(JG_FINAL_SEMKATH)
+UOTHER(JG_GAF)
+UOTHER(JG_GAMAL)
+UOTHER(JG_HAH)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 46
+UOTHER(JG_TEH_MARBUTA_GOAL)
+#endif /* ICU >= 4.6 */
+UOTHER(JG_HAMZA_ON_HEH_GOAL)
+UOTHER(JG_HE)
+UOTHER(JG_HEH)
+UOTHER(JG_HEH_GOAL)
+UOTHER(JG_HETH)
+UOTHER(JG_KAF)
+UOTHER(JG_KAPH)
+UOTHER(JG_KNOTTED_HEH)
+UOTHER(JG_LAM)
+UOTHER(JG_LAMADH)
+UOTHER(JG_MEEM)
+UOTHER(JG_MIM)
+UOTHER(JG_NOON)
+UOTHER(JG_NUN)
+UOTHER(JG_PE)
+UOTHER(JG_QAF)
+UOTHER(JG_QAPH)
+UOTHER(JG_REH)
+UOTHER(JG_REVERSED_PE)
+UOTHER(JG_SAD)
+UOTHER(JG_SADHE)
+UOTHER(JG_SEEN)
+UOTHER(JG_SEMKATH)
+UOTHER(JG_SHIN)
+UOTHER(JG_SWASH_KAF)
+UOTHER(JG_SYRIAC_WAW)
+UOTHER(JG_TAH)
+UOTHER(JG_TAW)
+UOTHER(JG_TEH_MARBUTA)
+UOTHER(JG_TETH)
+UOTHER(JG_WAW)
+UOTHER(JG_YEH)
+UOTHER(JG_YEH_BARREE)
+UOTHER(JG_YEH_WITH_TAIL)
+UOTHER(JG_YUDH)
+UOTHER(JG_YUDH_HE)
+UOTHER(JG_ZAIN)
+UOTHER(JG_FE)
+UOTHER(JG_KHAPH)
+UOTHER(JG_ZHAIN)
+UOTHER(JG_BURUSHASKI_YEH_BARREE)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44
+UOTHER(JG_FARSI_YEH)
+UOTHER(JG_NYA)
+#endif /* ICU >= 4.4 */
+#if U_ICU_VERSION_MAJOR_NUM >= 49
+UOTHER(JG_ROHINGYA_YEH)
+#endif
+#if U_ICU_VERSION_MAJOR_NUM >= 54
+UOTHER(JG_MANICHAEAN_ALEPH)
+UOTHER(JG_MANICHAEAN_AYIN)
+UOTHER(JG_MANICHAEAN_BETH)
+UOTHER(JG_MANICHAEAN_DALETH)
+UOTHER(JG_MANICHAEAN_DHAMEDH)
+UOTHER(JG_MANICHAEAN_FIVE)
+UOTHER(JG_MANICHAEAN_GIMEL)
+UOTHER(JG_MANICHAEAN_HETH)
+UOTHER(JG_MANICHAEAN_HUNDRED)
+UOTHER(JG_MANICHAEAN_KAPH)
+UOTHER(JG_MANICHAEAN_LAMEDH)
+UOTHER(JG_MANICHAEAN_MEM)
+UOTHER(JG_MANICHAEAN_NUN)
+UOTHER(JG_MANICHAEAN_ONE)
+UOTHER(JG_MANICHAEAN_PE)
+UOTHER(JG_MANICHAEAN_QOPH)
+UOTHER(JG_MANICHAEAN_RESH)
+UOTHER(JG_MANICHAEAN_SADHE)
+UOTHER(JG_MANICHAEAN_SAMEKH)
+UOTHER(JG_MANICHAEAN_TAW)
+UOTHER(JG_MANICHAEAN_TEN)
+UOTHER(JG_MANICHAEAN_TETH)
+UOTHER(JG_MANICHAEAN_THAMEDH)
+UOTHER(JG_MANICHAEAN_TWENTY)
+UOTHER(JG_MANICHAEAN_WAW)
+UOTHER(JG_MANICHAEAN_YODH)
+UOTHER(JG_MANICHAEAN_ZAYIN)
+UOTHER(JG_STRAIGHT_WAW)
+#endif /* ICU 54 */
+UOTHER(JG_COUNT )
+
+/* UGraphemeClusterBreak - http://icu-project.org/apiref/icu4c/uchar_8h.html#abb9bae7d2a1c80ce342be4647661fde1 */
+UOTHER(GCB_OTHER)
+UOTHER(GCB_CONTROL)
+UOTHER(GCB_CR)
+UOTHER(GCB_EXTEND)
+UOTHER(GCB_L)
+UOTHER(GCB_LF)
+UOTHER(GCB_LV)
+UOTHER(GCB_LVT)
+UOTHER(GCB_T)
+UOTHER(GCB_V)
+UOTHER(GCB_SPACING_MARK)
+UOTHER(GCB_PREPEND)
+#if U_ICU_VERSION_MAJOR_NUM >= 50
+UOTHER(GCB_REGIONAL_INDICATOR)
+#endif /* ICU 50 */
+UOTHER(GCB_COUNT)
+
+/* UWordBreakValues - http://icu-project.org/apiref/icu4c/uchar_8h.html#af70ee907368e663f8dd4b90c7196e15c */
+UOTHER(WB_OTHER)
+UOTHER(WB_ALETTER)
+UOTHER(WB_FORMAT)
+UOTHER(WB_KATAKANA)
+UOTHER(WB_MIDLETTER)
+UOTHER(WB_MIDNUM)
+UOTHER(WB_NUMERIC)
+UOTHER(WB_EXTENDNUMLET)
+UOTHER(WB_CR)
+UOTHER(WB_EXTEND)
+UOTHER(WB_LF)
+UOTHER(WB_MIDNUMLET)
+UOTHER(WB_NEWLINE)
+#if U_ICU_VERSION_MAJOR_NUM >= 50
+UOTHER(WB_REGIONAL_INDICATOR)
+#endif /* ICU >= 50 */
+#if U_ICU_VERSION_MAJOR_NUM >= 52
+UOTHER(WB_HEBREW_LETTER)
+UOTHER(WB_SINGLE_QUOTE)
+UOTHER(WB_DOUBLE_QUOTE)
+#endif /* ICU >= 52 */
+UOTHER(WB_COUNT)
+
+/* USentenceBreak - http://icu-project.org/apiref/icu4c/uchar_8h.html#a89e9e463c3bae1d2d46b1dbb6f90de0f */
+UOTHER(SB_OTHER)
+UOTHER(SB_ATERM)
+UOTHER(SB_CLOSE)
+UOTHER(SB_FORMAT)
+UOTHER(SB_LOWER)
+UOTHER(SB_NUMERIC)
+UOTHER(SB_OLETTER)
+UOTHER(SB_SEP)
+UOTHER(SB_SP)
+UOTHER(SB_STERM)
+UOTHER(SB_UPPER)
+UOTHER(SB_CR)
+UOTHER(SB_EXTEND)
+UOTHER(SB_LF)
+UOTHER(SB_SCONTINUE)
+UOTHER(SB_COUNT)
+
+/* ULineBreak - http://icu-project.org/apiref/icu4c/uchar_8h.html#a5d1abdf05be22cb9599f804a8506277c */
+UOTHER(LB_UNKNOWN)
+UOTHER(LB_AMBIGUOUS)
+UOTHER(LB_ALPHABETIC)
+UOTHER(LB_BREAK_BOTH)
+UOTHER(LB_BREAK_AFTER)
+UOTHER(LB_BREAK_BEFORE)
+UOTHER(LB_MANDATORY_BREAK)
+UOTHER(LB_CONTINGENT_BREAK)
+UOTHER(LB_CLOSE_PUNCTUATION)
+UOTHER(LB_COMBINING_MARK)
+UOTHER(LB_CARRIAGE_RETURN)
+UOTHER(LB_EXCLAMATION)
+UOTHER(LB_GLUE)
+UOTHER(LB_HYPHEN)
+UOTHER(LB_IDEOGRAPHIC)
+UOTHER(LB_INSEPARABLE)
+UOTHER(LB_INSEPERABLE)
+UOTHER(LB_INFIX_NUMERIC)
+UOTHER(LB_LINE_FEED)
+UOTHER(LB_NONSTARTER)
+UOTHER(LB_NUMERIC)
+UOTHER(LB_OPEN_PUNCTUATION)
+UOTHER(LB_POSTFIX_NUMERIC)
+UOTHER(LB_PREFIX_NUMERIC)
+UOTHER(LB_QUOTATION)
+UOTHER(LB_COMPLEX_CONTEXT)
+UOTHER(LB_SURROGATE)
+UOTHER(LB_SPACE)
+UOTHER(LB_BREAK_SYMBOLS)
+UOTHER(LB_ZWSPACE)
+UOTHER(LB_NEXT_LINE)
+UOTHER(LB_WORD_JOINER)
+UOTHER(LB_H2)
+UOTHER(LB_H3)
+UOTHER(LB_JL)
+UOTHER(LB_JT)
+UOTHER(LB_JV)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44
+UOTHER(LB_CLOSE_PARENTHESIS)
+#endif /* ICU >= 4.4 */
+#if U_ICU_VERSION_MAJOR_NUM >= 49
+UOTHER(LB_CONDITIONAL_JAPANESE_STARTER)
+UOTHER(LB_HEBREW_LETTER)
+#endif /* ICU >= 49 */
+#if U_ICU_VERSION_MAJOR_NUM >= 50
+UOTHER(LB_REGIONAL_INDICATOR)
+#endif /* ICU >= 50 */
+UOTHER(LB_COUNT)
+
+/* UNumericType - http://icu-project.org/apiref/icu4c/uchar_8h.html#adec3e7a6ae3a00274c019b3b2ddaecbe */
+UOTHER(NT_NONE)
+UOTHER(NT_DECIMAL)
+UOTHER(NT_DIGIT)
+UOTHER(NT_NUMERIC)
+UOTHER(NT_COUNT)
+
+/* UHangulSyllableType - http://icu-project.org/apiref/icu4c/uchar_8h.html#a7cb09027c37ad73571cf541caf002c8f */
+UOTHER(HST_NOT_APPLICABLE)
+UOTHER(HST_LEADING_JAMO)
+UOTHER(HST_VOWEL_JAMO)
+UOTHER(HST_TRAILING_JAMO)
+UOTHER(HST_LV_SYLLABLE)
+UOTHER(HST_LVT_SYLLABLE)
+UOTHER(HST_COUNT )
diff --git a/ext/intl/uchar/uproperty-enum.h b/ext/intl/uchar/uproperty-enum.h
new file mode 100644
index 0000000000..3b7b80f365
--- /dev/null
+++ b/ext/intl/uchar/uproperty-enum.h
@@ -0,0 +1,118 @@
+/* Generated from http://icu-project.org/apiref/icu4c/uchar_8h.html#ae40d616419e74ecc7c80a9febab03199 */
+UPROPERTY(ALPHABETIC)
+UPROPERTY(BINARY_START)
+UPROPERTY(ASCII_HEX_DIGIT)
+UPROPERTY(BIDI_CONTROL)
+UPROPERTY(BIDI_MIRRORED)
+UPROPERTY(DASH)
+UPROPERTY(DEFAULT_IGNORABLE_CODE_POINT)
+UPROPERTY(DEPRECATED)
+UPROPERTY(DIACRITIC)
+UPROPERTY(EXTENDER)
+UPROPERTY(FULL_COMPOSITION_EXCLUSION)
+UPROPERTY(GRAPHEME_BASE)
+UPROPERTY(GRAPHEME_EXTEND)
+UPROPERTY(GRAPHEME_LINK)
+UPROPERTY(HEX_DIGIT)
+UPROPERTY(HYPHEN)
+UPROPERTY(ID_CONTINUE)
+UPROPERTY(ID_START)
+UPROPERTY(IDEOGRAPHIC)
+UPROPERTY(IDS_BINARY_OPERATOR)
+UPROPERTY(IDS_TRINARY_OPERATOR)
+UPROPERTY(JOIN_CONTROL)
+UPROPERTY(LOGICAL_ORDER_EXCEPTION)
+UPROPERTY(LOWERCASE)
+UPROPERTY(MATH)
+UPROPERTY(NONCHARACTER_CODE_POINT)
+UPROPERTY(QUOTATION_MARK)
+UPROPERTY(RADICAL)
+UPROPERTY(SOFT_DOTTED)
+UPROPERTY(TERMINAL_PUNCTUATION)
+UPROPERTY(UNIFIED_IDEOGRAPH)
+UPROPERTY(UPPERCASE)
+UPROPERTY(WHITE_SPACE)
+UPROPERTY(XID_CONTINUE)
+UPROPERTY(XID_START)
+UPROPERTY(CASE_SENSITIVE)
+UPROPERTY(S_TERM)
+UPROPERTY(VARIATION_SELECTOR)
+UPROPERTY(NFD_INERT)
+UPROPERTY(NFKD_INERT)
+UPROPERTY(NFC_INERT)
+UPROPERTY(NFKC_INERT)
+UPROPERTY(SEGMENT_STARTER)
+UPROPERTY(PATTERN_SYNTAX)
+UPROPERTY(PATTERN_WHITE_SPACE)
+UPROPERTY(POSIX_ALNUM)
+UPROPERTY(POSIX_BLANK)
+UPROPERTY(POSIX_GRAPH)
+UPROPERTY(POSIX_PRINT)
+UPROPERTY(POSIX_XDIGIT)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44
+UPROPERTY(CASED)
+UPROPERTY(CASE_IGNORABLE)
+UPROPERTY(CHANGES_WHEN_LOWERCASED)
+UPROPERTY(CHANGES_WHEN_UPPERCASED)
+UPROPERTY(CHANGES_WHEN_TITLECASED)
+UPROPERTY(CHANGES_WHEN_CASEFOLDED)
+UPROPERTY(CHANGES_WHEN_CASEMAPPED)
+UPROPERTY(CHANGES_WHEN_NFKC_CASEFOLDED)
+#endif /* ICU >= 4.4 */
+UPROPERTY(BINARY_LIMIT)
+UPROPERTY(BIDI_CLASS)
+UPROPERTY(INT_START)
+UPROPERTY(BLOCK)
+UPROPERTY(CANONICAL_COMBINING_CLASS)
+UPROPERTY(DECOMPOSITION_TYPE)
+UPROPERTY(EAST_ASIAN_WIDTH)
+UPROPERTY(GENERAL_CATEGORY)
+UPROPERTY(JOINING_GROUP)
+UPROPERTY(JOINING_TYPE)
+UPROPERTY(LINE_BREAK)
+UPROPERTY(NUMERIC_TYPE)
+UPROPERTY(SCRIPT)
+UPROPERTY(HANGUL_SYLLABLE_TYPE)
+UPROPERTY(NFD_QUICK_CHECK)
+UPROPERTY(NFKD_QUICK_CHECK)
+UPROPERTY(NFC_QUICK_CHECK)
+UPROPERTY(NFKC_QUICK_CHECK)
+UPROPERTY(LEAD_CANONICAL_COMBINING_CLASS)
+UPROPERTY(TRAIL_CANONICAL_COMBINING_CLASS)
+UPROPERTY(GRAPHEME_CLUSTER_BREAK)
+UPROPERTY(SENTENCE_BREAK)
+UPROPERTY(WORD_BREAK)
+#if U_ICU_VERSION_MAJOR_NUM >= 52
+UPROPERTY(BIDI_PAIRED_BRACKET_TYPE)
+#endif /* ICU >= 52 */
+UPROPERTY(INT_LIMIT)
+UPROPERTY(GENERAL_CATEGORY_MASK)
+UPROPERTY(MASK_START)
+UPROPERTY(MASK_LIMIT)
+UPROPERTY(NUMERIC_VALUE)
+UPROPERTY(DOUBLE_START)
+UPROPERTY(DOUBLE_LIMIT)
+UPROPERTY(AGE)
+UPROPERTY(STRING_START)
+UPROPERTY(BIDI_MIRRORING_GLYPH)
+UPROPERTY(CASE_FOLDING)
+UPROPERTY(ISO_COMMENT)
+UPROPERTY(LOWERCASE_MAPPING)
+UPROPERTY(NAME)
+UPROPERTY(SIMPLE_CASE_FOLDING)
+UPROPERTY(SIMPLE_LOWERCASE_MAPPING)
+UPROPERTY(SIMPLE_TITLECASE_MAPPING)
+UPROPERTY(SIMPLE_UPPERCASE_MAPPING)
+UPROPERTY(TITLECASE_MAPPING)
+UPROPERTY(UNICODE_1_NAME)
+UPROPERTY(UPPERCASE_MAPPING)
+#if U_ICU_VERSION_MAJOR_NUM >= 52
+UPROPERTY(BIDI_PAIRED_BRACKET)
+#endif /* ICU >= 52 */
+UPROPERTY(STRING_LIMIT)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 46
+UPROPERTY(SCRIPT_EXTENSIONS)
+UPROPERTY(OTHER_PROPERTY_START)
+UPROPERTY(OTHER_PROPERTY_LIMIT)
+#endif /* ICU >= 4.6 */
+UPROPERTY(INVALID_CODE)