diff options
author | Sara Golemon <pollita@php.net> | 2014-12-15 14:29:52 -0800 |
---|---|---|
committer | Sara Golemon <sgolemon@fb.com> | 2015-01-16 14:37:44 -0800 |
commit | ebb60ac7dd179a3bea540d50a7d595010a82a656 (patch) | |
tree | 4a55cfedf9f3ad7fb156ab2025d8d224f7a14bc0 /ext | |
parent | 22bb3ce61a895f77178078da4a7e29cb39a532a7 (diff) | |
download | php-git-ebb60ac7dd179a3bea540d50a7d595010a82a656.tar.gz |
Add IntlChar class to intl extension
Exposes ICU's uchar functionality as a set of static methods
Diffstat (limited to 'ext')
-rw-r--r-- | ext/intl/config.m4 | 2 | ||||
-rw-r--r-- | ext/intl/config.w32 | 3 | ||||
-rw-r--r-- | ext/intl/php_intl.c | 4 | ||||
-rw-r--r-- | ext/intl/uchar/tests/basic-functionality.phpt | 153 | ||||
-rw-r--r-- | ext/intl/uchar/ublockcode-enum.h | 266 | ||||
-rw-r--r-- | ext/intl/uchar/uchar.c | 769 | ||||
-rw-r--r-- | ext/intl/uchar/uchar.h | 8 | ||||
-rw-r--r-- | ext/intl/uchar/ucharcategory-enum.h | 33 | ||||
-rw-r--r-- | ext/intl/uchar/uchardirection-enum.h | 26 | ||||
-rw-r--r-- | ext/intl/uchar/uother-enum.h | 284 | ||||
-rw-r--r-- | ext/intl/uchar/uproperty-enum.h | 118 |
11 files changed, 1666 insertions, 0 deletions
diff --git a/ext/intl/config.m4 b/ext/intl/config.m4 index 0fbbd0f786..dd2827621e 100644 --- a/ext/intl/config.m4 +++ b/ext/intl/config.m4 @@ -84,6 +84,7 @@ if test "$PHP_INTL" != "no"; then breakiterator/rulebasedbreakiterator_methods.cpp \ breakiterator/codepointiterator_internal.cpp \ breakiterator/codepointiterator_methods.cpp \ + uchar/uchar.c \ idn/idn.c \ $icu_spoof_src, $ext_shared,,$ICU_INCS -Wno-write-strings -D__STDC_LIMIT_MACROS -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1) PHP_ADD_BUILD_DIR($ext_builddir/collator) @@ -102,4 +103,5 @@ if test "$PHP_INTL" != "no"; then PHP_ADD_BUILD_DIR($ext_builddir/idn) PHP_ADD_BUILD_DIR($ext_builddir/spoofchecker) PHP_ADD_BUILD_DIR($ext_builddir/breakiterator) + PHP_ADD_BUILD_DIR($ext_builddir/uchar) fi diff --git a/ext/intl/config.w32 b/ext/intl/config.w32 index 4628e434f7..76bd19177b 100644 --- a/ext/intl/config.w32 +++ b/ext/intl/config.w32 @@ -72,6 +72,9 @@ if (PHP_INTL != "no") { dateformat_helpers.cpp \ dateformat_create.cpp \ ", "intl"); + ADD_SOURCES(configure_module_dirname + "/uchar", "\ + uchar.c", + "intl"); ADD_SOURCES(configure_module_dirname + "/idn", "\ idn.c", "intl"); diff --git a/ext/intl/php_intl.c b/ext/intl/php_intl.c index 3e45e48016..aef6dc15c7 100644 --- a/ext/intl/php_intl.c +++ b/ext/intl/php_intl.c @@ -85,6 +85,7 @@ #include "breakiterator/breakiterator_iterators.h" #include "idn/idn.h" +#include "uchar/uchar.h" #if U_ICU_VERSION_MAJOR_NUM * 1000 + U_ICU_VERSION_MINOR_NUM >= 4002 # include "spoofchecker/spoofchecker_class.h" @@ -1003,6 +1004,9 @@ PHP_MINIT_FUNCTION( intl ) /* 'Converter' class for codepage conversions */ php_converter_minit(INIT_FUNC_ARGS_PASSTHRU); + /* IntlChar class */ + php_uchar_minit(INIT_FUNC_ARGS_PASSTHRU); + return SUCCESS; } /* }}} */ diff --git a/ext/intl/uchar/tests/basic-functionality.phpt b/ext/intl/uchar/tests/basic-functionality.phpt new file mode 100644 index 0000000000..eb8e4fb261 --- /dev/null +++ b/ext/intl/uchar/tests/basic-functionality.phpt @@ -0,0 +1,153 @@ +--TEST-- +IntlChar basic functionality +--FILE-- +<?php + +function unicode_info($cp) { + $proplist = array( + IntlChar::PROPERTY_ALPHABETIC, + ); + $methodList = array( + 'isUAlphabetic', + 'isUUppercase', 'isupper', + 'isULowercase', 'islower', + 'isUWhiteSpace', 'isWhitespace', + 'istitle', 'isdigit', 'isalpha', 'isalnum', + 'isxdigit', 'ispunct', 'ispunct', 'isgraph', + 'isblank', 'isdefined', 'isspace', 'iscntrl', + 'isMirrored', 'isIDStart', 'isIDPart', + 'getBlockCode', 'charName', + ); + + $ncp = IntlChar::ord($cp); + printf("Codepoint U+%04x\n", $ncp); + + foreach($proplist as $prop) { + printf(" hasBinaryProperty(%s): %s\n", + IntlChar::getPropertyName($prop), + IntlChar::hasBinaryProperty($cp, $prop) ? "true" : "false" + ); + } + foreach($methodList as $method) { + echo " $method(): "; + var_dump(IntlChar::$method($cp)); + } + echo " charAge(): ", implode('.', IntlChar::charAge($cp)), "\n"; + echo "\n"; +} + +printf("Codepoint range: %04x-%04x\n", IntlChar::CODEPOINT_MIN, IntlChar::CODEPOINT_MAX); +$codepoints = array('P', 0xDF, 0x2603); +foreach($codepoints as $cp) { + unicode_info($cp); +} +echo "Sample range of codepoints: U+2600-U+260F\n"; +IntlChar::enumCharNames(0x2600, 0x2610, function($cp, $nc, $name) { + printf("U+%04x %s\n", $cp, $name); +}); +echo "RECYCLING SYMBOL FOR TYPE-1 PLASTICS => "; +var_dump(IntlChar::charFromName("RECYCLING SYMBOL FOR TYPE-1 PLASTICS")); +--EXPECT-- +Codepoint range: 0000-10ffff +Codepoint U+0050 + hasBinaryProperty(Alphabetic): true + isUAlphabetic(): bool(true) + isUUppercase(): bool(true) + isupper(): bool(true) + isULowercase(): bool(false) + islower(): bool(false) + isUWhiteSpace(): bool(false) + isWhitespace(): bool(false) + istitle(): bool(false) + isdigit(): bool(false) + isalpha(): bool(true) + isalnum(): bool(true) + isxdigit(): bool(false) + ispunct(): bool(false) + ispunct(): bool(false) + isgraph(): bool(true) + isblank(): bool(false) + isdefined(): bool(true) + isspace(): bool(false) + iscntrl(): bool(false) + isMirrored(): bool(false) + isIDStart(): bool(true) + isIDPart(): bool(true) + getBlockCode(): int(1) + charName(): string(22) "LATIN CAPITAL LETTER P" + charAge(): 1.1.0.0 + +Codepoint U+00df + hasBinaryProperty(Alphabetic): true + isUAlphabetic(): bool(true) + isUUppercase(): bool(false) + isupper(): bool(false) + isULowercase(): bool(true) + islower(): bool(true) + isUWhiteSpace(): bool(false) + isWhitespace(): bool(false) + istitle(): bool(false) + isdigit(): bool(false) + isalpha(): bool(true) + isalnum(): bool(true) + isxdigit(): bool(false) + ispunct(): bool(false) + ispunct(): bool(false) + isgraph(): bool(true) + isblank(): bool(false) + isdefined(): bool(true) + isspace(): bool(false) + iscntrl(): bool(false) + isMirrored(): bool(false) + isIDStart(): bool(true) + isIDPart(): bool(true) + getBlockCode(): int(2) + charName(): string(26) "LATIN SMALL LETTER SHARP S" + charAge(): 1.1.0.0 + +Codepoint U+2603 + hasBinaryProperty(Alphabetic): false + isUAlphabetic(): bool(false) + isUUppercase(): bool(false) + isupper(): bool(false) + isULowercase(): bool(false) + islower(): bool(false) + isUWhiteSpace(): bool(false) + isWhitespace(): bool(false) + istitle(): bool(false) + isdigit(): bool(false) + isalpha(): bool(false) + isalnum(): bool(false) + isxdigit(): bool(false) + ispunct(): bool(false) + ispunct(): bool(false) + isgraph(): bool(true) + isblank(): bool(false) + isdefined(): bool(true) + isspace(): bool(false) + iscntrl(): bool(false) + isMirrored(): bool(false) + isIDStart(): bool(false) + isIDPart(): bool(false) + getBlockCode(): int(55) + charName(): string(7) "SNOWMAN" + charAge(): 1.1.0.0 + +Sample range of codepoints: U+2600-U+260F +U+2600 BLACK SUN WITH RAYS +U+2601 CLOUD +U+2602 UMBRELLA +U+2603 SNOWMAN +U+2604 COMET +U+2605 BLACK STAR +U+2606 WHITE STAR +U+2607 LIGHTNING +U+2608 THUNDERSTORM +U+2609 SUN +U+260a ASCENDING NODE +U+260b DESCENDING NODE +U+260c CONJUNCTION +U+260d OPPOSITION +U+260e BLACK TELEPHONE +U+260f WHITE TELEPHONE +RECYCLING SYMBOL FOR TYPE-1 PLASTICS => int(9843) diff --git a/ext/intl/uchar/ublockcode-enum.h b/ext/intl/uchar/ublockcode-enum.h new file mode 100644 index 0000000000..aaed7f12ed --- /dev/null +++ b/ext/intl/uchar/ublockcode-enum.h @@ -0,0 +1,266 @@ +/* UBlockCode http://icu-project.org/apiref/icu4c/uchar_8h.html#a19f1662c9c33c31593612d05777ea901*/ +UBLOCKCODE(NO_BLOCK) +UBLOCKCODE(BASIC_LATIN) +UBLOCKCODE(LATIN_1_SUPPLEMENT) +UBLOCKCODE(LATIN_EXTENDED_A) +UBLOCKCODE(LATIN_EXTENDED_B) +UBLOCKCODE(IPA_EXTENSIONS) +UBLOCKCODE(SPACING_MODIFIER_LETTERS) +UBLOCKCODE(COMBINING_DIACRITICAL_MARKS) +UBLOCKCODE(GREEK) +UBLOCKCODE(CYRILLIC) +UBLOCKCODE(ARMENIAN) +UBLOCKCODE(HEBREW) +UBLOCKCODE(ARABIC) +UBLOCKCODE(SYRIAC) +UBLOCKCODE(THAANA) +UBLOCKCODE(DEVANAGARI) +UBLOCKCODE(BENGALI) +UBLOCKCODE(GURMUKHI) +UBLOCKCODE(GUJARATI) +UBLOCKCODE(ORIYA) +UBLOCKCODE(TAMIL) +UBLOCKCODE(TELUGU) +UBLOCKCODE(KANNADA) +UBLOCKCODE(MALAYALAM) +UBLOCKCODE(SINHALA) +UBLOCKCODE(THAI) +UBLOCKCODE(LAO) +UBLOCKCODE(TIBETAN) +UBLOCKCODE(MYANMAR) +UBLOCKCODE(GEORGIAN) +UBLOCKCODE(HANGUL_JAMO) +UBLOCKCODE(ETHIOPIC) +UBLOCKCODE(CHEROKEE) +UBLOCKCODE(UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS) +UBLOCKCODE(OGHAM) +UBLOCKCODE(RUNIC) +UBLOCKCODE(KHMER) +UBLOCKCODE(MONGOLIAN) +UBLOCKCODE(LATIN_EXTENDED_ADDITIONAL) +UBLOCKCODE(GREEK_EXTENDED) +UBLOCKCODE(GENERAL_PUNCTUATION) +UBLOCKCODE(SUPERSCRIPTS_AND_SUBSCRIPTS) +UBLOCKCODE(CURRENCY_SYMBOLS) +UBLOCKCODE(COMBINING_MARKS_FOR_SYMBOLS) +UBLOCKCODE(LETTERLIKE_SYMBOLS) +UBLOCKCODE(NUMBER_FORMS) +UBLOCKCODE(ARROWS) +UBLOCKCODE(MATHEMATICAL_OPERATORS) +UBLOCKCODE(MISCELLANEOUS_TECHNICAL) +UBLOCKCODE(CONTROL_PICTURES) +UBLOCKCODE(OPTICAL_CHARACTER_RECOGNITION) +UBLOCKCODE(ENCLOSED_ALPHANUMERICS) +UBLOCKCODE(BOX_DRAWING) +UBLOCKCODE(BLOCK_ELEMENTS) +UBLOCKCODE(GEOMETRIC_SHAPES) +UBLOCKCODE(MISCELLANEOUS_SYMBOLS) +UBLOCKCODE(DINGBATS) +UBLOCKCODE(BRAILLE_PATTERNS) +UBLOCKCODE(CJK_RADICALS_SUPPLEMENT) +UBLOCKCODE(KANGXI_RADICALS) +UBLOCKCODE(IDEOGRAPHIC_DESCRIPTION_CHARACTERS) +UBLOCKCODE(CJK_SYMBOLS_AND_PUNCTUATION) +UBLOCKCODE(HIRAGANA) +UBLOCKCODE(KATAKANA) +UBLOCKCODE(BOPOMOFO) +UBLOCKCODE(HANGUL_COMPATIBILITY_JAMO) +UBLOCKCODE(KANBUN) +UBLOCKCODE(BOPOMOFO_EXTENDED) +UBLOCKCODE(ENCLOSED_CJK_LETTERS_AND_MONTHS) +UBLOCKCODE(CJK_COMPATIBILITY) +UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) +UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS) +UBLOCKCODE(YI_SYLLABLES) +UBLOCKCODE(YI_RADICALS) +UBLOCKCODE(HANGUL_SYLLABLES) +UBLOCKCODE(HIGH_SURROGATES) +UBLOCKCODE(HIGH_PRIVATE_USE_SURROGATES) +UBLOCKCODE(LOW_SURROGATES) +UBLOCKCODE(PRIVATE_USE_AREA) +UBLOCKCODE(PRIVATE_USE) +UBLOCKCODE(CJK_COMPATIBILITY_IDEOGRAPHS) +UBLOCKCODE(ALPHABETIC_PRESENTATION_FORMS) +UBLOCKCODE(ARABIC_PRESENTATION_FORMS_A) +UBLOCKCODE(COMBINING_HALF_MARKS) +UBLOCKCODE(CJK_COMPATIBILITY_FORMS) +UBLOCKCODE(SMALL_FORM_VARIANTS) +UBLOCKCODE(ARABIC_PRESENTATION_FORMS_B) +UBLOCKCODE(SPECIALS) +UBLOCKCODE(HALFWIDTH_AND_FULLWIDTH_FORMS) +UBLOCKCODE(OLD_ITALIC) +UBLOCKCODE(GOTHIC) +UBLOCKCODE(DESERET) +UBLOCKCODE(BYZANTINE_MUSICAL_SYMBOLS) +UBLOCKCODE(MUSICAL_SYMBOLS) +UBLOCKCODE(MATHEMATICAL_ALPHANUMERIC_SYMBOLS) +UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) +UBLOCKCODE(CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT) +UBLOCKCODE(TAGS) +UBLOCKCODE(CYRILLIC_SUPPLEMENT) +UBLOCKCODE(CYRILLIC_SUPPLEMENTARY) +UBLOCKCODE(TAGALOG) +UBLOCKCODE(HANUNOO) +UBLOCKCODE(BUHID) +UBLOCKCODE(TAGBANWA) +UBLOCKCODE(MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A) +UBLOCKCODE(SUPPLEMENTAL_ARROWS_A) +UBLOCKCODE(SUPPLEMENTAL_ARROWS_B) +UBLOCKCODE(MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B) +UBLOCKCODE(SUPPLEMENTAL_MATHEMATICAL_OPERATORS) +UBLOCKCODE(KATAKANA_PHONETIC_EXTENSIONS) +UBLOCKCODE(VARIATION_SELECTORS) +UBLOCKCODE(SUPPLEMENTARY_PRIVATE_USE_AREA_A) +UBLOCKCODE(SUPPLEMENTARY_PRIVATE_USE_AREA_B) +UBLOCKCODE(LIMBU) +UBLOCKCODE(TAI_LE) +UBLOCKCODE(KHMER_SYMBOLS) +UBLOCKCODE(PHONETIC_EXTENSIONS) +UBLOCKCODE(MISCELLANEOUS_SYMBOLS_AND_ARROWS) +UBLOCKCODE(YIJING_HEXAGRAM_SYMBOLS) +UBLOCKCODE(LINEAR_B_SYLLABARY) +UBLOCKCODE(LINEAR_B_IDEOGRAMS) +UBLOCKCODE(AEGEAN_NUMBERS) +UBLOCKCODE(UGARITIC) +UBLOCKCODE(SHAVIAN) +UBLOCKCODE(OSMANYA) +UBLOCKCODE(CYPRIOT_SYLLABARY) +UBLOCKCODE(TAI_XUAN_JING_SYMBOLS) +UBLOCKCODE(VARIATION_SELECTORS_SUPPLEMENT) +UBLOCKCODE(ANCIENT_GREEK_MUSICAL_NOTATION) +UBLOCKCODE(ANCIENT_GREEK_NUMBERS) +UBLOCKCODE(ARABIC_SUPPLEMENT) +UBLOCKCODE(BUGINESE) +UBLOCKCODE(CJK_STROKES) +UBLOCKCODE(COMBINING_DIACRITICAL_MARKS_SUPPLEMENT) +UBLOCKCODE(COPTIC) +UBLOCKCODE(ETHIOPIC_EXTENDED) +UBLOCKCODE(ETHIOPIC_SUPPLEMENT) +UBLOCKCODE(GEORGIAN_SUPPLEMENT) +UBLOCKCODE(GLAGOLITIC) +UBLOCKCODE(KHAROSHTHI) +UBLOCKCODE(MODIFIER_TONE_LETTERS) +UBLOCKCODE(NEW_TAI_LUE) +UBLOCKCODE(OLD_PERSIAN) +UBLOCKCODE(PHONETIC_EXTENSIONS_SUPPLEMENT) +UBLOCKCODE(SUPPLEMENTAL_PUNCTUATION) +UBLOCKCODE(SYLOTI_NAGRI) +UBLOCKCODE(TIFINAGH) +UBLOCKCODE(VERTICAL_FORMS) +UBLOCKCODE(NKO) +UBLOCKCODE(BALINESE) +UBLOCKCODE(LATIN_EXTENDED_C) +UBLOCKCODE(LATIN_EXTENDED_D) +UBLOCKCODE(PHAGS_PA) +UBLOCKCODE(PHOENICIAN) +UBLOCKCODE(CUNEIFORM) +UBLOCKCODE(CUNEIFORM_NUMBERS_AND_PUNCTUATION) +UBLOCKCODE(COUNTING_ROD_NUMERALS) +UBLOCKCODE(SUNDANESE) +UBLOCKCODE(LEPCHA) +UBLOCKCODE(OL_CHIKI) +UBLOCKCODE(CYRILLIC_EXTENDED_A) +UBLOCKCODE(VAI) +UBLOCKCODE(CYRILLIC_EXTENDED_B) +UBLOCKCODE(SAURASHTRA) +UBLOCKCODE(KAYAH_LI) +UBLOCKCODE(REJANG) +UBLOCKCODE(CHAM) +UBLOCKCODE(ANCIENT_SYMBOLS) +UBLOCKCODE(PHAISTOS_DISC) +UBLOCKCODE(LYCIAN) +UBLOCKCODE(CARIAN) +UBLOCKCODE(LYDIAN) +UBLOCKCODE(MAHJONG_TILES) +UBLOCKCODE(DOMINO_TILES) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44 +UBLOCKCODE(SAMARITAN) +UBLOCKCODE(UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED) +UBLOCKCODE(TAI_THAM) +UBLOCKCODE(VEDIC_EXTENSIONS) +UBLOCKCODE(LISU) +UBLOCKCODE(BAMUM) +UBLOCKCODE(COMMON_INDIC_NUMBER_FORMS) +UBLOCKCODE(DEVANAGARI_EXTENDED) +UBLOCKCODE(HANGUL_JAMO_EXTENDED_A) +UBLOCKCODE(JAVANESE) +UBLOCKCODE(MYANMAR_EXTENDED_A) +UBLOCKCODE(TAI_VIET) +UBLOCKCODE(MEETEI_MAYEK) +UBLOCKCODE(HANGUL_JAMO_EXTENDED_B) +UBLOCKCODE(IMPERIAL_ARAMAIC) +UBLOCKCODE(OLD_SOUTH_ARABIAN) +UBLOCKCODE(AVESTAN) +UBLOCKCODE(INSCRIPTIONAL_PARTHIAN) +UBLOCKCODE(INSCRIPTIONAL_PAHLAVI) +UBLOCKCODE(OLD_TURKIC) +UBLOCKCODE(RUMI_NUMERAL_SYMBOLS) +UBLOCKCODE(KAITHI) +UBLOCKCODE(EGYPTIAN_HIEROGLYPHS) +UBLOCKCODE(ENCLOSED_ALPHANUMERIC_SUPPLEMENT) +UBLOCKCODE(ENCLOSED_IDEOGRAPHIC_SUPPLEMENT) +UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C) +#endif /* ICU >= 4.4 */ +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 46 +UBLOCKCODE(MANDAIC) +UBLOCKCODE(BATAK) +UBLOCKCODE(ETHIOPIC_EXTENDED_A) +UBLOCKCODE(BRAHMI) +UBLOCKCODE(BAMUM_SUPPLEMENT) +UBLOCKCODE(KANA_SUPPLEMENT) +UBLOCKCODE(PLAYING_CARDS) +UBLOCKCODE(MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS) +UBLOCKCODE(EMOTICONS) +UBLOCKCODE(TRANSPORT_AND_MAP_SYMBOLS) +UBLOCKCODE(ALCHEMICAL_SYMBOLS) +UBLOCKCODE(CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D) +#endif /* ICU >= 49.0 */ +#if U_ICU_VERSION_MAJOR_NUM >= 49 +UBLOCKCODE(ARABIC_EXTENDED_A) +UBLOCKCODE(ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS) +UBLOCKCODE(CHAKMA) +UBLOCKCODE(MEETEI_MAYEK_EXTENSIONS) +UBLOCKCODE(MEROITIC_CURSIVE) +UBLOCKCODE(MEROITIC_HIEROGLYPHS) +UBLOCKCODE(MIAO) +UBLOCKCODE(SHARADA) +UBLOCKCODE(SORA_SOMPENG) +UBLOCKCODE(SUNDANESE_SUPPLEMENT) +UBLOCKCODE(TAKRI) +#endif /* ICU >= 49 */ +#if U_ICU_VERSION_MAJOR_NUM >= 54 +UBLOCKCODE(BASSA_VAH) +UBLOCKCODE(CAUCASIAN_ALBANIAN) +UBLOCKCODE(COPTIC_EPACT_NUMBERS) +UBLOCKCODE(COMBINING_DIACRITICAL_MARKS_EXTENDED) +UBLOCKCODE(DUPLOYAN) +UBLOCKCODE(ELBASAN) +UBLOCKCODE(GEOMETRIC_SHAPES_EXTENDED) +UBLOCKCODE(GRANTHA) +UBLOCKCODE(KHOJKI) +UBLOCKCODE(KHUDAWADI) +UBLOCKCODE(LATIN_EXTENDED_E) +UBLOCKCODE(LINEAR_A) +UBLOCKCODE(MAHAJANI) +UBLOCKCODE(MANICHAEAN) +UBLOCKCODE(MENDE_KIKAKUI) +UBLOCKCODE(MODI) +UBLOCKCODE(MRO) +UBLOCKCODE(MYANMAR_EXTENDED_B) +UBLOCKCODE(NABATAEAN) +UBLOCKCODE(OLD_NORTH_ARABIAN) +UBLOCKCODE(OLD_PERMIC) +UBLOCKCODE(ORNAMENTAL_DINGBATS) +UBLOCKCODE(PAHAWH_HMONG) +UBLOCKCODE(PALMYRENE) +UBLOCKCODE(PAU_CIN_HAU) +UBLOCKCODE(PSALTER_PAHLAVI) +UBLOCKCODE(SHORTHAND_FORMAT_CONTROLS) +UBLOCKCODE(SIDDHAM) +UBLOCKCODE(SINHALA_ARCHAIC_NUMBERS) +UBLOCKCODE(SUPPLEMENTAL_ARROWS_C) +UBLOCKCODE(TIRHUTA) +UBLOCKCODE(WARANG_CITI) +#endif /* ICU >= 54 */ +UBLOCKCODE(COUNT) +UBLOCKCODE(INVALID_CODE) diff --git a/ext/intl/uchar/uchar.c b/ext/intl/uchar/uchar.c new file mode 100644 index 0000000000..b65417bc60 --- /dev/null +++ b/ext/intl/uchar/uchar.c @@ -0,0 +1,769 @@ +#include "uchar.h" +#include "ext/intl/intl_data.h" +#include "ext/intl/intl_convert.h" + +#include <unicode/uchar.h> + +#define IC_METHOD(mname) PHP_METHOD(IntlChar, mname) + +inline int convert_cp(UChar32* pcp, zval *zcp) { + zend_long cp = -1; + if (Z_TYPE_P(zcp) == IS_LONG) { + cp = Z_LVAL_P(zcp); + } else if (Z_TYPE_P(zcp) == IS_STRING) { + int i = 0; + U8_NEXT(Z_STRVAL_P(zcp), i, Z_STRLEN_P(zcp), cp); + if (i != Z_STRLEN_P(zcp)) { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0); + return FAILURE; + } + } else { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point. Must be either integer or UTF-8 sequence.", 0); + return FAILURE; + } + if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Codepoint out of range", 0); + return FAILURE; + } + *pcp = (UChar32)cp; + return SUCCESS; +} + +/* {{{ proto string IntlChar::chr(int|string $char) + * Converts a numeric codepoint to UTF-8 + * Acts as an identify function when given a valid UTF-8 encoded codepoint + */ +ZEND_BEGIN_ARG_INFO_EX(chr_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, codepoint) +ZEND_END_ARG_INFO(); +IC_METHOD(chr) { + UChar32 cp; + zval *zcp; + char buffer[5]; + int buffer_len = 0; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + /* We can use unsafe because we know the codepoint is in valid range + * and that 4 bytes is enough for any unicode point + */ + U8_APPEND_UNSAFE(buffer, buffer_len, cp); + buffer[buffer_len] = 0; + RETURN_STRINGL(buffer, buffer_len); +} +/* }}} */ + +/* {{{ proto int IntlChar::ord(int|string $codepoint) + * Converts a UTf-8 encoded codepoint to its integer U32 value + * Acts as an identity function when passed a valid integer codepoint + */ +ZEND_BEGIN_ARG_INFO_EX(ord_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, character) +ZEND_END_ARG_INFO(); +IC_METHOD(ord) { + UChar32 cp; + zval *zcp; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + RETURN_LONG(cp); +} +/* }}} */ + +/* {{{ proto bool IntlChar::hasBinaryProperty(int|string $char, int $property) */ +ZEND_BEGIN_ARG_INFO_EX(hasBinaryProperty_arginfo, 0, ZEND_RETURN_VALUE, 2) + ZEND_ARG_INFO(0, codepoint) + ZEND_ARG_INFO(0, property) +ZEND_END_ARG_INFO(); +IC_METHOD(hasBinaryProperty) { + UChar32 cp; + zend_long prop; + zval *zcp; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + RETURN_BOOL(u_hasBinaryProperty(cp, (UProperty)prop)); +} +/* }}} */ + +/* {{{ proto int IntlChar::getIntPropertyValue(int|string $char, int $property) */ +ZEND_BEGIN_ARG_INFO_EX(getIntPropertyValue_arginfo, 0, ZEND_RETURN_VALUE, 2) + ZEND_ARG_INFO(0, codepoint) + ZEND_ARG_INFO(0, property) +ZEND_END_ARG_INFO(); +IC_METHOD(getIntPropertyValue) { + UChar32 cp; + zend_long prop; + zval *zcp; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + RETURN_LONG(u_getIntPropertyValue(cp, (UProperty)prop)); +} +/* }}} */ + +/* {{{ proto int IntlChar::getIntPropertyMinValue(int $property) */ +ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMinValue_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, property) +ZEND_END_ARG_INFO(); +IC_METHOD(getIntPropertyMinValue) { + zend_long prop; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) { + return; + } + + RETURN_LONG(u_getIntPropertyMinValue((UProperty)prop)); +} +/* }}} */ + +/* {{{ proto int IntlChar::getIntPropertyMxValue(int $property) */ +ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMaxValue_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, property) +ZEND_END_ARG_INFO(); +IC_METHOD(getIntPropertyMaxValue) { + zend_long prop; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) { + return; + } + + RETURN_LONG(u_getIntPropertyMaxValue((UProperty)prop)); +} +/* }}} */ + +/* {{{ proto float IntlChar::getNumericValue(int|string $char) */ +ZEND_BEGIN_ARG_INFO_EX(getNumericValue_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, codepoint) +ZEND_END_ARG_INFO(); +IC_METHOD(getNumericValue) { + UChar32 cp; + zval *zcp; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + RETURN_DOUBLE(u_getNumericValue(cp)); +} +/* }}} */ + +/* {{{ proto void IntlChar::enumCharTypes(callable $cb) */ +ZEND_BEGIN_ARG_INFO_EX(enumCharTypes_arginfo, 0, ZEND_RETURN_VALUE, 0) + ZEND_ARG_INFO(0, callback) +ZEND_END_ARG_INFO(); +typedef struct _enumCharType_data { + zend_fcall_info fci; + zend_fcall_info_cache fci_cache; +} enumCharType_data; +static UBool enumCharType_callback(enumCharType_data *context, + UChar32 start, UChar32 limit, + UCharCategory type) { + zval retval; + zval args[3]; + + ZVAL_NULL(&retval); + /* Note that $start is INclusive, whiel $limit is EXclusive + * Therefore (0, 32, 15) means CPs 0..31 are of type 15 + */ + ZVAL_LONG(&args[0], start); + ZVAL_LONG(&args[1], limit); + ZVAL_LONG(&args[2], type); + + context->fci.retval = &retval; + context->fci.param_count = 3; + context->fci.params = args; + + if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) { + intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR); + intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0); + zval_dtor(&retval); + return 0; + } + zval_dtor(&retval); + return 1; +} +IC_METHOD(enumCharTypes) { + enumCharType_data context; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "f", &context.fci, &context.fci_cache) == FAILURE) { + return; + } + u_enumCharTypes((UCharEnumTypeRange*)enumCharType_callback, &context); +} +/* }}} */ + +/* {{{ proto int IntlChar::getBlockCode(int|string $char) */ +ZEND_BEGIN_ARG_INFO_EX(getBlockCode_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, codepoint) +ZEND_END_ARG_INFO() +IC_METHOD(getBlockCode) { + UChar32 cp; + zval *zcp; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + RETURN_LONG(ublock_getCode(cp)); +} +/* }}} */ + +/* {{{ proto string IntlChar::charName(int|string $char, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */ +ZEND_BEGIN_ARG_INFO_EX(charName_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, codepoint) + ZEND_ARG_INFO(0, nameChoice) +ZEND_END_ARG_INFO() +IC_METHOD(charName) { + UChar32 cp; + zval *zcp; + UErrorCode error = U_ZERO_ERROR; + zend_long nameChoice = U_UNICODE_CHAR_NAME; + zend_string *buffer = NULL; + int32_t buffer_len; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &nameChoice) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, NULL, 0, &error); + buffer = zend_string_alloc(buffer_len, 0); + error = U_ZERO_ERROR; + buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, buffer->val, buffer->len + 1, &error); + if (U_FAILURE(error)) { + zend_string_free(buffer); + INTL_CHECK_STATUS(error, "Failure getting character name"); + } + RETURN_NEW_STR(buffer); +} +/* }}} */ + +/* {{{ proto int IntlChar::charFromName(string $name, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */ +ZEND_BEGIN_ARG_INFO_EX(charFromName_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, characterName) + ZEND_ARG_INFO(0, nameChoice) +ZEND_END_ARG_INFO() +IC_METHOD(charFromName) { + char *name; + size_t name_len; + zend_long nameChoice = U_UNICODE_CHAR_NAME; + UChar32 ret; + UErrorCode error = U_ZERO_ERROR; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &name, &name_len, &nameChoice) == FAILURE) { + return; + } + + ret = u_charFromName((UCharNameChoice)nameChoice, name, &error); + INTL_CHECK_STATUS(error, NULL); + RETURN_LONG(ret); +} +/* }}} */ + +/* {{{ void void IntlChar::enumCharNames(int|string $start, int|string $limit, callable $callback, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */ +ZEND_BEGIN_ARG_INFO_EX(enumCharNames_arginfo, 0, ZEND_RETURN_VALUE, 3) + ZEND_ARG_INFO(0, start) + ZEND_ARG_INFO(0, limit) + ZEND_ARG_INFO(0, callback) + ZEND_ARG_INFO(0, nameChoice) +ZEND_END_ARG_INFO(); +typedef struct _enumCharNames_data { + zend_fcall_info fci; + zend_fcall_info_cache fci_cache; +} enumCharNames_data; +static UBool enumCharNames_callback(enumCharNames_data *context, + UChar32 code, UCharNameChoice nameChoice, + const char *name, int32_t length) { + zval retval; + zval args[3]; + + ZVAL_NULL(&retval); + ZVAL_LONG(&args[0], code); + ZVAL_LONG(&args[1], nameChoice); + ZVAL_STRINGL(&args[2], name, length); + + context->fci.retval = &retval; + context->fci.param_count = 3; + context->fci.params = args; + + if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) { + intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR); + intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0); + zval_dtor(&retval); + zval_dtor(&args[2]); + return 0; + } + zval_dtor(&retval); + zval_dtor(&args[2]); + return 1; +} +IC_METHOD(enumCharNames) { + UChar32 start, limit; + zval *zstart, *zlimit; + enumCharNames_data context; + zend_long nameChoice = U_UNICODE_CHAR_NAME; + UErrorCode error = U_ZERO_ERROR; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zzf|l", &zstart, &zlimit, &context.fci, &context.fci_cache, &nameChoice) == FAILURE) || + (convert_cp(&start, zstart) == FAILURE) || + (convert_cp(&limit, zlimit) == FAILURE)) { + return; + } + + u_enumCharNames(start, limit, (UEnumCharNamesFn*)enumCharNames_callback, &context, nameChoice, &error); + INTL_CHECK_STATUS(error, NULL); +} +/* }}} */ + +/* {{{ proto string IntlChar::getPropertyName(int $property, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */ +ZEND_BEGIN_ARG_INFO_EX(getPropertyName_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, property) + ZEND_ARG_INFO(0, nameChoice) +ZEND_END_ARG_INFO(); +IC_METHOD(getPropertyName) { + zend_long property; + zend_long nameChoice = U_LONG_PROPERTY_NAME; + const char *ret; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &property, &nameChoice) == FAILURE) { + return; + } + + ret = u_getPropertyName((UProperty)property, (UPropertyNameChoice)nameChoice); + if (ret) { + RETURN_STRING(ret); + } else { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Failed to get property name", 0); + RETURN_FALSE; + } +} +/* }}} */ + +/* {{{ proto int IntlChar::getPropertyEnum(string $alias) */ +ZEND_BEGIN_ARG_INFO_EX(getPropertyEnum_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, alias) +ZEND_END_ARG_INFO(); +IC_METHOD(getPropertyEnum) { + char *alias; + size_t alias_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &alias, &alias_len) == FAILURE) { + return; + } + + RETURN_LONG(u_getPropertyEnum(alias)); +} +/* }}} */ + +/* {{{ proto string IntlChar::getPropertyValueName(int $prop, int $val[, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */ +ZEND_BEGIN_ARG_INFO_EX(getPropertyValueName_arginfo, 0, ZEND_RETURN_VALUE, 2) + ZEND_ARG_INFO(0, property) + ZEND_ARG_INFO(0, value) + ZEND_ARG_INFO(0, nameChoice) +ZEND_END_ARG_INFO(); +IC_METHOD(getPropertyValueName) { + zend_long property, value, nameChoice = U_LONG_PROPERTY_NAME; + const char *ret; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll|l", &property, &value, &nameChoice) == FAILURE) { + return; + } + + ret = u_getPropertyValueName((UProperty)property, value, (UPropertyNameChoice)nameChoice); + if (ret) { + RETURN_STRING(ret); + } else { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Failed to get property name", 0); + RETURN_FALSE; + } +} +/* }}} */ + +/* {{{ proto int IntlChar::getPropertyValueEnum(int $property, string $name) */ +ZEND_BEGIN_ARG_INFO_EX(getPropertyValueEnum_arginfo, 0, ZEND_RETURN_VALUE, 2) + ZEND_ARG_INFO(0, property) + ZEND_ARG_INFO(0, name) +ZEND_END_ARG_INFO(); +IC_METHOD(getPropertyValueEnum) { + zend_long property; + char *name; + size_t name_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ls", &property, &name, &name_len) == FAILURE) { + return; + } + + RETURN_LONG(u_getPropertyValueEnum((UProperty)property, name)); +} +/* }}} */ + +/* {{{ proto int|string IntlChar::foldCase(int|string $char, int $options = IntlChar::FOLD_CASE_DEFAULT) */ +ZEND_BEGIN_ARG_INFO_EX(foldCase_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, foldCase) +ZEND_END_ARG_INFO(); +IC_METHOD(foldCase) { + UChar32 cp, ret; + zval *zcp; + zend_long options = U_FOLD_CASE_DEFAULT; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &options) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + ret = u_foldCase(cp, options); + if (Z_TYPE_P(zcp) == IS_STRING) { + char buffer[5]; + int buffer_len = 0; + U8_APPEND_UNSAFE(buffer, buffer_len, ret); + buffer[buffer_len] = 0; + RETURN_STRINGL(buffer, buffer_len); + } else { + RETURN_LONG(ret); + } +} +/* }}} */ + +/* {{{ proto int IntlChar::digit(int|string $char[, int $radix = 10]) */ +ZEND_BEGIN_ARG_INFO_EX(digit_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, codepoint) + ZEND_ARG_INFO(0, radix) +ZEND_END_ARG_INFO(); +IC_METHOD(digit) { + UChar32 cp; + zval *zcp; + zend_long radix = 10; + int ret; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &radix) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + ret = u_digit(cp, radix); + if (ret < 0) { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Invalid digit", 0); + RETURN_FALSE; + } + RETURN_LONG(ret); +} +/* }}} */ + +/* {{{ proto int IntlChar::forDigit(int $digit[, int $radix = 10]) */ +ZEND_BEGIN_ARG_INFO_EX(forDigit_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, digit) + ZEND_ARG_INFO(0, radix) +ZEND_END_ARG_INFO(); +IC_METHOD(forDigit) { + zend_long digit, radix = 10; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll", &digit, &radix) == FAILURE) { + return; + } + + RETURN_LONG(u_forDigit(digit, radix)); +} +/* }}} */ + +/* {{{ proto array IntlChar::charAge(int|string $char) */ +ZEND_BEGIN_ARG_INFO_EX(charAge_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, codepoint) +ZEND_END_ARG_INFO(); +IC_METHOD(charAge) { + UChar32 cp; + zval *zcp; + UVersionInfo version; + int i; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + u_charAge(cp, version); + array_init(return_value); + for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) { + add_next_index_long(return_value, version[i]); + } +} +/* }}} */ + +/* {{{ proto array IntlChar::getUnicodeVersion() */ +ZEND_BEGIN_ARG_INFO_EX(getUnicodeVersion_arginfo, 0, ZEND_RETURN_VALUE, 0) +ZEND_END_ARG_INFO(); +IC_METHOD(getUnicodeVersion) { + UVersionInfo version; + int i; + + + u_getUnicodeVersion(version); + array_init(return_value); + for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) { + add_next_index_long(return_value, version[i]); + } +} +/* }}} */ + +/* {{{ proto string IntlChar::getFC_NFKC_Closure(int|string $char) */ +ZEND_BEGIN_ARG_INFO_EX(getFC_NFKC_Closure_arginfo, 0, ZEND_RETURN_VALUE, 1) + ZEND_ARG_INFO(0, codepoint) +ZEND_END_ARG_INFO(); +IC_METHOD(getFC_NFKC_Closure) { + UChar32 cp; + zval *zcp; + UChar *closure; + char *ret; + int32_t closure_len, ret_len; + UErrorCode error = U_ZERO_ERROR; + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || + (convert_cp(&cp, zcp) == FAILURE)) { + return; + } + + closure_len = u_getFC_NFKC_Closure(cp, NULL, 0, &error); + if (closure_len == 0) { + RETURN_EMPTY_STRING(); + } + closure = safe_emalloc(sizeof(UChar), closure_len + 1, 0); + error = U_ZERO_ERROR; + closure_len = u_getFC_NFKC_Closure(cp, closure, closure_len, &error); + if (U_FAILURE(error)) { + efree(closure); + INTL_CHECK_STATUS(error, "Failed getting closure"); + } + + error = U_ZERO_ERROR; + intl_convert_utf16_to_utf8(&ret, &ret_len, closure, closure_len, &error); + efree(closure); + INTL_CHECK_STATUS(error, "Failed converting output to UTF8"); + RETVAL_STRINGL(ret, ret_len); + efree(ret); +} +/* }}} */ + +/* {{{ proto bool IntlChar::<name>(int|string $char) */ +#define IC_BOOL_METHOD_CHAR(name) \ +ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \ + ZEND_ARG_INFO(0, codepoint) \ +ZEND_END_ARG_INFO(); \ +IC_METHOD(name) { \ + UChar32 cp; zval *zcp; \ + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \ + (convert_cp(&cp, zcp) == FAILURE)) { return; } \ + RETURN_BOOL(u_##name(cp)); \ +} +IC_BOOL_METHOD_CHAR(isUAlphabetic) +IC_BOOL_METHOD_CHAR(isULowercase) +IC_BOOL_METHOD_CHAR(isUUppercase) +IC_BOOL_METHOD_CHAR(isUWhiteSpace) +IC_BOOL_METHOD_CHAR(islower) +IC_BOOL_METHOD_CHAR(isupper) +IC_BOOL_METHOD_CHAR(istitle) +IC_BOOL_METHOD_CHAR(isdigit) +IC_BOOL_METHOD_CHAR(isalpha) +IC_BOOL_METHOD_CHAR(isalnum) +IC_BOOL_METHOD_CHAR(isxdigit) +IC_BOOL_METHOD_CHAR(ispunct) +IC_BOOL_METHOD_CHAR(isgraph) +IC_BOOL_METHOD_CHAR(isblank) +IC_BOOL_METHOD_CHAR(isdefined) +IC_BOOL_METHOD_CHAR(isspace) +IC_BOOL_METHOD_CHAR(isJavaSpaceChar) +IC_BOOL_METHOD_CHAR(isWhitespace) +IC_BOOL_METHOD_CHAR(iscntrl) +IC_BOOL_METHOD_CHAR(isISOControl) +IC_BOOL_METHOD_CHAR(isprint) +IC_BOOL_METHOD_CHAR(isbase) +IC_BOOL_METHOD_CHAR(isMirrored) +IC_BOOL_METHOD_CHAR(isIDStart) +IC_BOOL_METHOD_CHAR(isIDPart) +IC_BOOL_METHOD_CHAR(isIDIgnorable) +IC_BOOL_METHOD_CHAR(isJavaIDStart) +IC_BOOL_METHOD_CHAR(isJavaIDPart) +#undef IC_BOOL_METHOD_CHAR +/* }}} */ + +/* {{{ proto int IntlChar::<name>(int|string $char) */ +#define IC_INT_METHOD_CHAR(name) \ +ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \ + ZEND_ARG_INFO(0, codepoint) \ +ZEND_END_ARG_INFO(); \ +IC_METHOD(name) { \ + UChar32 cp; zval *zcp; \ + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \ + (convert_cp(&cp, zcp) == FAILURE)) { return; } \ + RETURN_LONG(u_##name(cp)); \ +} +IC_INT_METHOD_CHAR(charDirection) +IC_INT_METHOD_CHAR(charType) +IC_INT_METHOD_CHAR(getCombiningClass) +IC_INT_METHOD_CHAR(charDigitValue) +#undef IC_INT_METHOD_CHAR +/* }}} */ + +/* {{{ proto int|string IntlChar::<name>(int|string $char) + * Returns a utf-8 character if codepoint was passed as a utf-8 sequence + * Returns an int otherwise + */ +#define IC_CHAR_METHOD_CHAR(name) \ +ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \ + ZEND_ARG_INFO(0, codepoint) \ +ZEND_END_ARG_INFO(); \ +IC_METHOD(name) { \ + UChar32 cp, ret; zval *zcp; \ + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \ + (convert_cp(&cp, zcp) == FAILURE)) { return; } \ + ret = u_##name(cp); \ + if (Z_TYPE_P(zcp) == IS_STRING) { \ + char buffer[5]; \ + int buffer_len = 0; \ + U8_APPEND_UNSAFE(buffer, buffer_len, ret); \ + buffer[buffer_len] = 0; \ + RETURN_STRINGL(buffer, buffer_len); \ + } else { \ + RETURN_LONG(ret); \ + } \ +} +IC_CHAR_METHOD_CHAR(charMirror) +IC_CHAR_METHOD_CHAR(tolower) +IC_CHAR_METHOD_CHAR(toupper) +IC_CHAR_METHOD_CHAR(totitle) +#if U_ICU_VERSION_MAJOR_NUM >= 52 +IC_CHAR_METHOD_CHAR(getBidiPairedBracket) +#endif /* ICU >= 52 */ +#undef IC_CHAR_METHOD_CHAR +/* }}} */ + +static zend_function_entry intlchar_methods[] = { +#define IC_ME(mname) PHP_ME(IntlChar, mname, mname##_arginfo, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) + IC_ME(chr) + IC_ME(ord) + IC_ME(hasBinaryProperty) + IC_ME(isUAlphabetic) + IC_ME(isULowercase) + IC_ME(isUUppercase) + IC_ME(isUWhiteSpace) + IC_ME(getIntPropertyValue) + IC_ME(getIntPropertyMinValue) + IC_ME(getIntPropertyMaxValue) + IC_ME(getNumericValue) + IC_ME(islower) + IC_ME(isupper) + IC_ME(istitle) + IC_ME(isdigit) + IC_ME(isalpha) + IC_ME(isalnum) + IC_ME(isxdigit) + IC_ME(ispunct) + IC_ME(isgraph) + IC_ME(isblank) + IC_ME(isdefined) + IC_ME(isspace) + IC_ME(isJavaSpaceChar) + IC_ME(isWhitespace) + IC_ME(iscntrl) + IC_ME(isISOControl) + IC_ME(isprint) + IC_ME(isbase) + IC_ME(charDirection) + IC_ME(isMirrored) + IC_ME(charMirror) +#if U_ICU_VERSION_MAJOR_NUM >= 52 + IC_ME(getBidiPairedBracket) +#endif /* ICU >= 52 */ + IC_ME(charType) + IC_ME(enumCharTypes) + IC_ME(getCombiningClass) + IC_ME(charDigitValue) + IC_ME(getBlockCode) + IC_ME(charName) + IC_ME(charFromName) + IC_ME(enumCharNames) + IC_ME(getPropertyName) + IC_ME(getPropertyEnum) + IC_ME(getPropertyValueName) + IC_ME(getPropertyValueEnum) + IC_ME(isIDStart) + IC_ME(isIDPart) + IC_ME(isIDIgnorable) + IC_ME(isJavaIDStart) + IC_ME(isJavaIDPart) + IC_ME(tolower) + IC_ME(toupper) + IC_ME(totitle) + IC_ME(foldCase) + IC_ME(digit) + IC_ME(forDigit) + IC_ME(charAge) + IC_ME(getUnicodeVersion) + IC_ME(getFC_NFKC_Closure) +#undef IC_ME + PHP_FE_END +}; + +int php_uchar_minit(INIT_FUNC_ARGS) { + zend_class_entry tmp, *ce; + + INIT_CLASS_ENTRY(tmp, "IntlChar", intlchar_methods); + ce = zend_register_internal_class(&tmp); + +#define IC_CONSTL(name, val) \ + zend_declare_class_constant_long(ce, name, strlen(name), val); + + zend_declare_class_constant_string(ce, "UNICODE_VERSION", sizeof("UNICODE_VERISON")-1, U_UNICODE_VERSION); + IC_CONSTL("CODEPOINT_MIN", UCHAR_MIN_VALUE) + IC_CONSTL("CODEPOINT_MAX", UCHAR_MAX_VALUE) + + /* All enums used by the uchar APIs. There are a LOT of them, + * so they're separated out into include files, + * leaving this source file for actual implementation. + */ +#define UPROPERTY(name) IC_CONSTL("PROPERTY_" #name, UCHAR_##name) +#include "uproperty-enum.h" +#undef UPROPERTY + +#define UCHARCATEGORY(name) IC_CONSTL("CHAR_CATEGORY_" #name, U_##name) +#include "ucharcategory-enum.h" +#undef UCHARCATEGORY + +#define UCHARDIRECTION(name) IC_CONSTL("CHAR_DIRECTION_" #name, U_##name) +#include "uchardirection-enum.h" +#undef UCHARDIRECTION + +#define UBLOCKCODE(name) IC_CONSTL("BLOCK_CODE_" #name, UBLOCK_##name) +#include "ublockcode-enum.h" +#undef UBLOCKCODE + + /* Smaller, self-destribing enums */ +#define UOTHER(name) IC_CONSTL(#name, U_##name) +#include "uother-enum.h" +#undef UOTHER + +#undef IC_CONSTL +#undef IC_CONSTS + + return SUCCESS; +} + diff --git a/ext/intl/uchar/uchar.h b/ext/intl/uchar/uchar.h new file mode 100644 index 0000000000..25bab1f215 --- /dev/null +++ b/ext/intl/uchar/uchar.h @@ -0,0 +1,8 @@ +#ifndef incl_PHP_INTL_UCHAR_H +#define incl_PHP_INTL_UCHAR_H + +#include "php.h" + +int php_uchar_minit(INIT_FUNC_ARGS); + +#endif // incl_PHP_INTL_UCHAR_H diff --git a/ext/intl/uchar/ucharcategory-enum.h b/ext/intl/uchar/ucharcategory-enum.h new file mode 100644 index 0000000000..2db8a2f860 --- /dev/null +++ b/ext/intl/uchar/ucharcategory-enum.h @@ -0,0 +1,33 @@ +/* Generate from http://icu-project.org/apiref/icu4c/uchar_8h.html#a6a2dbc531efce8d77fdb4c314e7fc25e */ +UCHARCATEGORY(UNASSIGNED) +UCHARCATEGORY(GENERAL_OTHER_TYPES) +UCHARCATEGORY(UPPERCASE_LETTER) +UCHARCATEGORY(LOWERCASE_LETTER) +UCHARCATEGORY(TITLECASE_LETTER) +UCHARCATEGORY(MODIFIER_LETTER) +UCHARCATEGORY(OTHER_LETTER) +UCHARCATEGORY(NON_SPACING_MARK) +UCHARCATEGORY(ENCLOSING_MARK) +UCHARCATEGORY(COMBINING_SPACING_MARK) +UCHARCATEGORY(DECIMAL_DIGIT_NUMBER) +UCHARCATEGORY(LETTER_NUMBER) +UCHARCATEGORY(OTHER_NUMBER) +UCHARCATEGORY(SPACE_SEPARATOR) +UCHARCATEGORY(LINE_SEPARATOR) +UCHARCATEGORY(PARAGRAPH_SEPARATOR) +UCHARCATEGORY(CONTROL_CHAR) +UCHARCATEGORY(FORMAT_CHAR) +UCHARCATEGORY(PRIVATE_USE_CHAR) +UCHARCATEGORY(SURROGATE) +UCHARCATEGORY(DASH_PUNCTUATION) +UCHARCATEGORY(START_PUNCTUATION) +UCHARCATEGORY(END_PUNCTUATION) +UCHARCATEGORY(CONNECTOR_PUNCTUATION) +UCHARCATEGORY(OTHER_PUNCTUATION) +UCHARCATEGORY(MATH_SYMBOL) +UCHARCATEGORY(CURRENCY_SYMBOL) +UCHARCATEGORY(MODIFIER_SYMBOL) +UCHARCATEGORY(OTHER_SYMBOL) +UCHARCATEGORY(INITIAL_PUNCTUATION) +UCHARCATEGORY(FINAL_PUNCTUATION) +UCHARCATEGORY(CHAR_CATEGORY_COUNT) diff --git a/ext/intl/uchar/uchardirection-enum.h b/ext/intl/uchar/uchardirection-enum.h new file mode 100644 index 0000000000..afbbdb28cb --- /dev/null +++ b/ext/intl/uchar/uchardirection-enum.h @@ -0,0 +1,26 @@ +UCHARDIRECTION(LEFT_TO_RIGHT) +UCHARDIRECTION(RIGHT_TO_LEFT) +UCHARDIRECTION(EUROPEAN_NUMBER) +UCHARDIRECTION(EUROPEAN_NUMBER_SEPARATOR) +UCHARDIRECTION(EUROPEAN_NUMBER_TERMINATOR) +UCHARDIRECTION(ARABIC_NUMBER) +UCHARDIRECTION(COMMON_NUMBER_SEPARATOR) +UCHARDIRECTION(BLOCK_SEPARATOR) +UCHARDIRECTION(SEGMENT_SEPARATOR) +UCHARDIRECTION(WHITE_SPACE_NEUTRAL) +UCHARDIRECTION(OTHER_NEUTRAL) +UCHARDIRECTION(LEFT_TO_RIGHT_EMBEDDING) +UCHARDIRECTION(LEFT_TO_RIGHT_OVERRIDE) +UCHARDIRECTION(RIGHT_TO_LEFT_ARABIC) +UCHARDIRECTION(RIGHT_TO_LEFT_EMBEDDING) +UCHARDIRECTION(RIGHT_TO_LEFT_OVERRIDE) +UCHARDIRECTION(POP_DIRECTIONAL_FORMAT) +UCHARDIRECTION(DIR_NON_SPACING_MARK) +UCHARDIRECTION(BOUNDARY_NEUTRAL) +#if U_ICU_VERSION_MAJOR_NUM >= 52 +UCHARDIRECTION(FIRST_STRONG_ISOLATE) +UCHARDIRECTION(LEFT_TO_RIGHT_ISOLATE) +UCHARDIRECTION(RIGHT_TO_LEFT_ISOLATE) +UCHARDIRECTION(POP_DIRECTIONAL_ISOLATE) +#endif /* ICU >= 52 */ +UCHARDIRECTION(CHAR_DIRECTION_COUNT) diff --git a/ext/intl/uchar/uother-enum.h b/ext/intl/uchar/uother-enum.h new file mode 100644 index 0000000000..babcf5cb16 --- /dev/null +++ b/ext/intl/uchar/uother-enum.h @@ -0,0 +1,284 @@ +/* UBidiPairedBracketType - http://icu-project.org/apiref/icu4c/uchar_8h.html#af954219aa1df452657ec355221c6703d */ +#if U_ICU_VERSION_MAJOR_NUM >= 52 +UOTHER(BPT_NONE) +UOTHER(BPT_OPEN) +UOTHER(BPT_CLOSE) +UOTHER(BPT_COUNT) +#endif /* ICU >= 52 */ + +/* UEastAsianWidth - http://icu-project.org/apiref/icu4c/uchar_8h.html#a95cc2ca2f9cfd6d0c63eee2c65951333 */ +UOTHER(EA_NEUTRAL) +UOTHER(EA_AMBIGUOUS) +UOTHER(EA_HALFWIDTH) +UOTHER(EA_FULLWIDTH ) +UOTHER(EA_NARROW) +UOTHER(EA_WIDE) +UOTHER(EA_COUNT) + +/* UCharNameChoice - http://icu-project.org/apiref/icu4c/uchar_8h.html#a2ba37edcca62eff48226e8096035addf */ +UOTHER(UNICODE_CHAR_NAME) +UOTHER(UNICODE_10_CHAR_NAME) +UOTHER(EXTENDED_CHAR_NAME) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44 +UOTHER(CHAR_NAME_ALIAS) +#endif /* ICU >= 4.4 */ +UOTHER(CHAR_NAME_CHOICE_COUNT) + +/* UPropertyNameChoice - http://icu-project.org/apiref/icu4c/uchar_8h.html#a5056494c7d5a2c7185f3c464f48fe5d1 */ +UOTHER(SHORT_PROPERTY_NAME) +UOTHER(LONG_PROPERTY_NAME) +UOTHER(PROPERTY_NAME_CHOICE_COUNT) + +/* UDecompositionType - http://icu-project.org/apiref/icu4c/uchar_8h.html#ae2c56994fcf28062c7e77beb671533f5 */ +UOTHER(DT_NONE) +UOTHER(DT_CANONICAL) +UOTHER(DT_COMPAT) +UOTHER(DT_CIRCLE) +UOTHER(DT_FINAL) +UOTHER(DT_FONT) +UOTHER(DT_FRACTION) +UOTHER(DT_INITIAL) +UOTHER(DT_ISOLATED) +UOTHER(DT_MEDIAL) +UOTHER(DT_NARROW) +UOTHER(DT_NOBREAK) +UOTHER(DT_SMALL) +UOTHER(DT_SQUARE) +UOTHER(DT_SUB) +UOTHER(DT_SUPER) +UOTHER(DT_VERTICAL) +UOTHER(DT_WIDE) +UOTHER(DT_COUNT ) + +/* UJoiningType - http://icu-project.org/apiref/icu4c/uchar_8h.html#a3ce1ce20e7f3b8534eb3490ad3aba3dd */ +UOTHER(JT_NON_JOINING) +UOTHER(JT_JOIN_CAUSING) +UOTHER(JT_DUAL_JOINING) +UOTHER(JT_LEFT_JOINING) +UOTHER(JT_RIGHT_JOINING) +UOTHER(JT_TRANSPARENT) +UOTHER(JT_COUNT) + +/* UJoiningGroup - http://icu-project.org/apiref/icu4c/uchar_8h.html#a7887844ec0872e6e9a283e0825fcae65 */ +UOTHER(JG_NO_JOINING_GROUP) +UOTHER(JG_AIN) +UOTHER(JG_ALAPH) +UOTHER(JG_ALEF) +UOTHER(JG_BEH) +UOTHER(JG_BETH) +UOTHER(JG_DAL) +UOTHER(JG_DALATH_RISH) +UOTHER(JG_E) +UOTHER(JG_FEH) +UOTHER(JG_FINAL_SEMKATH) +UOTHER(JG_GAF) +UOTHER(JG_GAMAL) +UOTHER(JG_HAH) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 46 +UOTHER(JG_TEH_MARBUTA_GOAL) +#endif /* ICU >= 4.6 */ +UOTHER(JG_HAMZA_ON_HEH_GOAL) +UOTHER(JG_HE) +UOTHER(JG_HEH) +UOTHER(JG_HEH_GOAL) +UOTHER(JG_HETH) +UOTHER(JG_KAF) +UOTHER(JG_KAPH) +UOTHER(JG_KNOTTED_HEH) +UOTHER(JG_LAM) +UOTHER(JG_LAMADH) +UOTHER(JG_MEEM) +UOTHER(JG_MIM) +UOTHER(JG_NOON) +UOTHER(JG_NUN) +UOTHER(JG_PE) +UOTHER(JG_QAF) +UOTHER(JG_QAPH) +UOTHER(JG_REH) +UOTHER(JG_REVERSED_PE) +UOTHER(JG_SAD) +UOTHER(JG_SADHE) +UOTHER(JG_SEEN) +UOTHER(JG_SEMKATH) +UOTHER(JG_SHIN) +UOTHER(JG_SWASH_KAF) +UOTHER(JG_SYRIAC_WAW) +UOTHER(JG_TAH) +UOTHER(JG_TAW) +UOTHER(JG_TEH_MARBUTA) +UOTHER(JG_TETH) +UOTHER(JG_WAW) +UOTHER(JG_YEH) +UOTHER(JG_YEH_BARREE) +UOTHER(JG_YEH_WITH_TAIL) +UOTHER(JG_YUDH) +UOTHER(JG_YUDH_HE) +UOTHER(JG_ZAIN) +UOTHER(JG_FE) +UOTHER(JG_KHAPH) +UOTHER(JG_ZHAIN) +UOTHER(JG_BURUSHASKI_YEH_BARREE) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44 +UOTHER(JG_FARSI_YEH) +UOTHER(JG_NYA) +#endif /* ICU >= 4.4 */ +#if U_ICU_VERSION_MAJOR_NUM >= 49 +UOTHER(JG_ROHINGYA_YEH) +#endif +#if U_ICU_VERSION_MAJOR_NUM >= 54 +UOTHER(JG_MANICHAEAN_ALEPH) +UOTHER(JG_MANICHAEAN_AYIN) +UOTHER(JG_MANICHAEAN_BETH) +UOTHER(JG_MANICHAEAN_DALETH) +UOTHER(JG_MANICHAEAN_DHAMEDH) +UOTHER(JG_MANICHAEAN_FIVE) +UOTHER(JG_MANICHAEAN_GIMEL) +UOTHER(JG_MANICHAEAN_HETH) +UOTHER(JG_MANICHAEAN_HUNDRED) +UOTHER(JG_MANICHAEAN_KAPH) +UOTHER(JG_MANICHAEAN_LAMEDH) +UOTHER(JG_MANICHAEAN_MEM) +UOTHER(JG_MANICHAEAN_NUN) +UOTHER(JG_MANICHAEAN_ONE) +UOTHER(JG_MANICHAEAN_PE) +UOTHER(JG_MANICHAEAN_QOPH) +UOTHER(JG_MANICHAEAN_RESH) +UOTHER(JG_MANICHAEAN_SADHE) +UOTHER(JG_MANICHAEAN_SAMEKH) +UOTHER(JG_MANICHAEAN_TAW) +UOTHER(JG_MANICHAEAN_TEN) +UOTHER(JG_MANICHAEAN_TETH) +UOTHER(JG_MANICHAEAN_THAMEDH) +UOTHER(JG_MANICHAEAN_TWENTY) +UOTHER(JG_MANICHAEAN_WAW) +UOTHER(JG_MANICHAEAN_YODH) +UOTHER(JG_MANICHAEAN_ZAYIN) +UOTHER(JG_STRAIGHT_WAW) +#endif /* ICU 54 */ +UOTHER(JG_COUNT ) + +/* UGraphemeClusterBreak - http://icu-project.org/apiref/icu4c/uchar_8h.html#abb9bae7d2a1c80ce342be4647661fde1 */ +UOTHER(GCB_OTHER) +UOTHER(GCB_CONTROL) +UOTHER(GCB_CR) +UOTHER(GCB_EXTEND) +UOTHER(GCB_L) +UOTHER(GCB_LF) +UOTHER(GCB_LV) +UOTHER(GCB_LVT) +UOTHER(GCB_T) +UOTHER(GCB_V) +UOTHER(GCB_SPACING_MARK) +UOTHER(GCB_PREPEND) +#if U_ICU_VERSION_MAJOR_NUM >= 50 +UOTHER(GCB_REGIONAL_INDICATOR) +#endif /* ICU 50 */ +UOTHER(GCB_COUNT) + +/* UWordBreakValues - http://icu-project.org/apiref/icu4c/uchar_8h.html#af70ee907368e663f8dd4b90c7196e15c */ +UOTHER(WB_OTHER) +UOTHER(WB_ALETTER) +UOTHER(WB_FORMAT) +UOTHER(WB_KATAKANA) +UOTHER(WB_MIDLETTER) +UOTHER(WB_MIDNUM) +UOTHER(WB_NUMERIC) +UOTHER(WB_EXTENDNUMLET) +UOTHER(WB_CR) +UOTHER(WB_EXTEND) +UOTHER(WB_LF) +UOTHER(WB_MIDNUMLET) +UOTHER(WB_NEWLINE) +#if U_ICU_VERSION_MAJOR_NUM >= 50 +UOTHER(WB_REGIONAL_INDICATOR) +#endif /* ICU >= 50 */ +#if U_ICU_VERSION_MAJOR_NUM >= 52 +UOTHER(WB_HEBREW_LETTER) +UOTHER(WB_SINGLE_QUOTE) +UOTHER(WB_DOUBLE_QUOTE) +#endif /* ICU >= 52 */ +UOTHER(WB_COUNT) + +/* USentenceBreak - http://icu-project.org/apiref/icu4c/uchar_8h.html#a89e9e463c3bae1d2d46b1dbb6f90de0f */ +UOTHER(SB_OTHER) +UOTHER(SB_ATERM) +UOTHER(SB_CLOSE) +UOTHER(SB_FORMAT) +UOTHER(SB_LOWER) +UOTHER(SB_NUMERIC) +UOTHER(SB_OLETTER) +UOTHER(SB_SEP) +UOTHER(SB_SP) +UOTHER(SB_STERM) +UOTHER(SB_UPPER) +UOTHER(SB_CR) +UOTHER(SB_EXTEND) +UOTHER(SB_LF) +UOTHER(SB_SCONTINUE) +UOTHER(SB_COUNT) + +/* ULineBreak - http://icu-project.org/apiref/icu4c/uchar_8h.html#a5d1abdf05be22cb9599f804a8506277c */ +UOTHER(LB_UNKNOWN) +UOTHER(LB_AMBIGUOUS) +UOTHER(LB_ALPHABETIC) +UOTHER(LB_BREAK_BOTH) +UOTHER(LB_BREAK_AFTER) +UOTHER(LB_BREAK_BEFORE) +UOTHER(LB_MANDATORY_BREAK) +UOTHER(LB_CONTINGENT_BREAK) +UOTHER(LB_CLOSE_PUNCTUATION) +UOTHER(LB_COMBINING_MARK) +UOTHER(LB_CARRIAGE_RETURN) +UOTHER(LB_EXCLAMATION) +UOTHER(LB_GLUE) +UOTHER(LB_HYPHEN) +UOTHER(LB_IDEOGRAPHIC) +UOTHER(LB_INSEPARABLE) +UOTHER(LB_INSEPERABLE) +UOTHER(LB_INFIX_NUMERIC) +UOTHER(LB_LINE_FEED) +UOTHER(LB_NONSTARTER) +UOTHER(LB_NUMERIC) +UOTHER(LB_OPEN_PUNCTUATION) +UOTHER(LB_POSTFIX_NUMERIC) +UOTHER(LB_PREFIX_NUMERIC) +UOTHER(LB_QUOTATION) +UOTHER(LB_COMPLEX_CONTEXT) +UOTHER(LB_SURROGATE) +UOTHER(LB_SPACE) +UOTHER(LB_BREAK_SYMBOLS) +UOTHER(LB_ZWSPACE) +UOTHER(LB_NEXT_LINE) +UOTHER(LB_WORD_JOINER) +UOTHER(LB_H2) +UOTHER(LB_H3) +UOTHER(LB_JL) +UOTHER(LB_JT) +UOTHER(LB_JV) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44 +UOTHER(LB_CLOSE_PARENTHESIS) +#endif /* ICU >= 4.4 */ +#if U_ICU_VERSION_MAJOR_NUM >= 49 +UOTHER(LB_CONDITIONAL_JAPANESE_STARTER) +UOTHER(LB_HEBREW_LETTER) +#endif /* ICU >= 49 */ +#if U_ICU_VERSION_MAJOR_NUM >= 50 +UOTHER(LB_REGIONAL_INDICATOR) +#endif /* ICU >= 50 */ +UOTHER(LB_COUNT) + +/* UNumericType - http://icu-project.org/apiref/icu4c/uchar_8h.html#adec3e7a6ae3a00274c019b3b2ddaecbe */ +UOTHER(NT_NONE) +UOTHER(NT_DECIMAL) +UOTHER(NT_DIGIT) +UOTHER(NT_NUMERIC) +UOTHER(NT_COUNT) + +/* UHangulSyllableType - http://icu-project.org/apiref/icu4c/uchar_8h.html#a7cb09027c37ad73571cf541caf002c8f */ +UOTHER(HST_NOT_APPLICABLE) +UOTHER(HST_LEADING_JAMO) +UOTHER(HST_VOWEL_JAMO) +UOTHER(HST_TRAILING_JAMO) +UOTHER(HST_LV_SYLLABLE) +UOTHER(HST_LVT_SYLLABLE) +UOTHER(HST_COUNT ) diff --git a/ext/intl/uchar/uproperty-enum.h b/ext/intl/uchar/uproperty-enum.h new file mode 100644 index 0000000000..3b7b80f365 --- /dev/null +++ b/ext/intl/uchar/uproperty-enum.h @@ -0,0 +1,118 @@ +/* Generated from http://icu-project.org/apiref/icu4c/uchar_8h.html#ae40d616419e74ecc7c80a9febab03199 */ +UPROPERTY(ALPHABETIC) +UPROPERTY(BINARY_START) +UPROPERTY(ASCII_HEX_DIGIT) +UPROPERTY(BIDI_CONTROL) +UPROPERTY(BIDI_MIRRORED) +UPROPERTY(DASH) +UPROPERTY(DEFAULT_IGNORABLE_CODE_POINT) +UPROPERTY(DEPRECATED) +UPROPERTY(DIACRITIC) +UPROPERTY(EXTENDER) +UPROPERTY(FULL_COMPOSITION_EXCLUSION) +UPROPERTY(GRAPHEME_BASE) +UPROPERTY(GRAPHEME_EXTEND) +UPROPERTY(GRAPHEME_LINK) +UPROPERTY(HEX_DIGIT) +UPROPERTY(HYPHEN) +UPROPERTY(ID_CONTINUE) +UPROPERTY(ID_START) +UPROPERTY(IDEOGRAPHIC) +UPROPERTY(IDS_BINARY_OPERATOR) +UPROPERTY(IDS_TRINARY_OPERATOR) +UPROPERTY(JOIN_CONTROL) +UPROPERTY(LOGICAL_ORDER_EXCEPTION) +UPROPERTY(LOWERCASE) +UPROPERTY(MATH) +UPROPERTY(NONCHARACTER_CODE_POINT) +UPROPERTY(QUOTATION_MARK) +UPROPERTY(RADICAL) +UPROPERTY(SOFT_DOTTED) +UPROPERTY(TERMINAL_PUNCTUATION) +UPROPERTY(UNIFIED_IDEOGRAPH) +UPROPERTY(UPPERCASE) +UPROPERTY(WHITE_SPACE) +UPROPERTY(XID_CONTINUE) +UPROPERTY(XID_START) +UPROPERTY(CASE_SENSITIVE) +UPROPERTY(S_TERM) +UPROPERTY(VARIATION_SELECTOR) +UPROPERTY(NFD_INERT) +UPROPERTY(NFKD_INERT) +UPROPERTY(NFC_INERT) +UPROPERTY(NFKC_INERT) +UPROPERTY(SEGMENT_STARTER) +UPROPERTY(PATTERN_SYNTAX) +UPROPERTY(PATTERN_WHITE_SPACE) +UPROPERTY(POSIX_ALNUM) +UPROPERTY(POSIX_BLANK) +UPROPERTY(POSIX_GRAPH) +UPROPERTY(POSIX_PRINT) +UPROPERTY(POSIX_XDIGIT) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 44 +UPROPERTY(CASED) +UPROPERTY(CASE_IGNORABLE) +UPROPERTY(CHANGES_WHEN_LOWERCASED) +UPROPERTY(CHANGES_WHEN_UPPERCASED) +UPROPERTY(CHANGES_WHEN_TITLECASED) +UPROPERTY(CHANGES_WHEN_CASEFOLDED) +UPROPERTY(CHANGES_WHEN_CASEMAPPED) +UPROPERTY(CHANGES_WHEN_NFKC_CASEFOLDED) +#endif /* ICU >= 4.4 */ +UPROPERTY(BINARY_LIMIT) +UPROPERTY(BIDI_CLASS) +UPROPERTY(INT_START) +UPROPERTY(BLOCK) +UPROPERTY(CANONICAL_COMBINING_CLASS) +UPROPERTY(DECOMPOSITION_TYPE) +UPROPERTY(EAST_ASIAN_WIDTH) +UPROPERTY(GENERAL_CATEGORY) +UPROPERTY(JOINING_GROUP) +UPROPERTY(JOINING_TYPE) +UPROPERTY(LINE_BREAK) +UPROPERTY(NUMERIC_TYPE) +UPROPERTY(SCRIPT) +UPROPERTY(HANGUL_SYLLABLE_TYPE) +UPROPERTY(NFD_QUICK_CHECK) +UPROPERTY(NFKD_QUICK_CHECK) +UPROPERTY(NFC_QUICK_CHECK) +UPROPERTY(NFKC_QUICK_CHECK) +UPROPERTY(LEAD_CANONICAL_COMBINING_CLASS) +UPROPERTY(TRAIL_CANONICAL_COMBINING_CLASS) +UPROPERTY(GRAPHEME_CLUSTER_BREAK) +UPROPERTY(SENTENCE_BREAK) +UPROPERTY(WORD_BREAK) +#if U_ICU_VERSION_MAJOR_NUM >= 52 +UPROPERTY(BIDI_PAIRED_BRACKET_TYPE) +#endif /* ICU >= 52 */ +UPROPERTY(INT_LIMIT) +UPROPERTY(GENERAL_CATEGORY_MASK) +UPROPERTY(MASK_START) +UPROPERTY(MASK_LIMIT) +UPROPERTY(NUMERIC_VALUE) +UPROPERTY(DOUBLE_START) +UPROPERTY(DOUBLE_LIMIT) +UPROPERTY(AGE) +UPROPERTY(STRING_START) +UPROPERTY(BIDI_MIRRORING_GLYPH) +UPROPERTY(CASE_FOLDING) +UPROPERTY(ISO_COMMENT) +UPROPERTY(LOWERCASE_MAPPING) +UPROPERTY(NAME) +UPROPERTY(SIMPLE_CASE_FOLDING) +UPROPERTY(SIMPLE_LOWERCASE_MAPPING) +UPROPERTY(SIMPLE_TITLECASE_MAPPING) +UPROPERTY(SIMPLE_UPPERCASE_MAPPING) +UPROPERTY(TITLECASE_MAPPING) +UPROPERTY(UNICODE_1_NAME) +UPROPERTY(UPPERCASE_MAPPING) +#if U_ICU_VERSION_MAJOR_NUM >= 52 +UPROPERTY(BIDI_PAIRED_BRACKET) +#endif /* ICU >= 52 */ +UPROPERTY(STRING_LIMIT) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 46 +UPROPERTY(SCRIPT_EXTENSIONS) +UPROPERTY(OTHER_PROPERTY_START) +UPROPERTY(OTHER_PROPERTY_LIMIT) +#endif /* ICU >= 4.6 */ +UPROPERTY(INVALID_CODE) |