diff options
-rw-r--r-- | UPGRADING | 6 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer.c | 4 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer.h | 4 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer_normalize.c | 9 | ||||
-rw-r--r-- | ext/intl/tests/normalizer_normalize_kc_cf.phpt | 107 |
5 files changed, 128 insertions, 2 deletions
@@ -137,7 +137,6 @@ Intl: http://icu-project.org/apiref/icu4c/uspoof_8h.html . Added Normalizer::getRawDecomposition() and normalizer_get_raw_decomposition(), to retrieve the Decomposition_Mapping property of a character. - . Normalizer::NONE is deprecated, when PHP is linked with ICU 56 and above Standard: . Added is_countable() function, to check whether a value may be passed to @@ -170,6 +169,11 @@ JSON: FTP: . Set default transfer mode to binary + Intl: + . Normalizer::NONE is deprecated, when PHP is linked with ICU >= 56 + . Introduced Normalizer::FORM_KC_CF as Normalizer::normalize() argument + for NFKC_Casefold normalization, available when linked with ICU >= 56 + MBString: . The configuration option --with-libmbfl is no longer available. diff --git a/ext/intl/normalizer/normalizer.c b/ext/intl/normalizer/normalizer.c index 3a0d526b77..2f7555388e 100644 --- a/ext/intl/normalizer/normalizer.c +++ b/ext/intl/normalizer/normalizer.c @@ -51,6 +51,10 @@ void normalizer_register_constants( INIT_FUNC_ARGS ) NORMALIZER_EXPOSE_CLASS_CONST( NFC ); NORMALIZER_EXPOSE_CLASS_CONST( FORM_KC ); NORMALIZER_EXPOSE_CLASS_CONST( NFKC ); +#if U_ICU_VERSION_MAJOR_NUM >= 49 + NORMALIZER_EXPOSE_CLASS_CONST( FORM_KC_CF ); + NORMALIZER_EXPOSE_CLASS_CONST( NFKC_CF ); +#endif #undef NORMALIZER_EXPOSE_CUSTOM_CLASS_CONST #undef NORMALIZER_EXPOSE_CLASS_CONST diff --git a/ext/intl/normalizer/normalizer.h b/ext/intl/normalizer/normalizer.h index ddd2c38a94..8150f244f6 100644 --- a/ext/intl/normalizer/normalizer.h +++ b/ext/intl/normalizer/normalizer.h @@ -19,7 +19,7 @@ #include <php.h> #include <unicode/utypes.h> -#if U_ICU_VERSION_MAJOR_NUM < 56 +#if U_ICU_VERSION_MAJOR_NUM < 49 #include <unicode/unorm.h> #define NORMALIZER_NONE UNORM_NONE @@ -44,6 +44,8 @@ #define NORMALIZER_NFC NORMALIZER_FORM_C #define NORMALIZER_FORM_KC 0x20 #define NORMALIZER_NFKC NORMALIZER_FORM_KC +#define NORMALIZER_FORM_KC_CF 0x30 +#define NORMALIZER_NFKC_CF NORMALIZER_FORM_KC_CF #define NORMALIZER_DEFAULT NORMALIZER_FORM_C #endif diff --git a/ext/intl/normalizer/normalizer_normalize.c b/ext/intl/normalizer/normalizer_normalize.c index 8fbe7d40c8..e9431f2372 100644 --- a/ext/intl/normalizer/normalizer_normalize.c +++ b/ext/intl/normalizer/normalizer_normalize.c @@ -51,6 +51,9 @@ static const UNormalizer2 *intl_get_normalizer(zend_long form, UErrorCode *err) case NORMALIZER_FORM_KD: return unorm2_getNFKDInstance(err); break; + case NORMALIZER_FORM_KC_CF: + return unorm2_getNFKCCasefoldInstance(err); + break; } *err = U_ILLEGAL_ARGUMENT_ERROR; @@ -146,6 +149,9 @@ PHP_FUNCTION( normalizer_normalize ) break; case NORMALIZER_FORM_C: case NORMALIZER_FORM_KC: +#if U_ICU_VERSION_MAJOR_NUM >= 56 + case NORMALIZER_FORM_KC_CF: +#endif break; default: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, @@ -279,6 +285,9 @@ PHP_FUNCTION( normalizer_is_normalized ) case NORMALIZER_FORM_KD: case NORMALIZER_FORM_C: case NORMALIZER_FORM_KC: +#if U_ICU_VERSION_MAJOR_NUM >= 56 + case NORMALIZER_FORM_KC_CF: +#endif break; default: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, diff --git a/ext/intl/tests/normalizer_normalize_kc_cf.phpt b/ext/intl/tests/normalizer_normalize_kc_cf.phpt new file mode 100644 index 0000000000..040cd29e0b --- /dev/null +++ b/ext/intl/tests/normalizer_normalize_kc_cf.phpt @@ -0,0 +1,107 @@ +--TEST-- +normalize() NFKC_Casefold +--SKIPIF-- +<?php if (!extension_loaded('intl')) print 'skip'; ?> +<?php if (!defined('Normalizer::FORM_KC_CF')) print 'skip'; ?> +--FILE-- +<?php + +/* + * Try normalization and test normalization + * with Procedural and Object methods. + */ + +function ut_main() +{ + $res_str = ''; + + $forms = array( + Normalizer::FORM_KC_CF, + ); + + $forms_str = array ( + Normalizer::FORM_KC_CF => 'UNORM_FORM_KC_CF', + ); + + /* just make sure all the form constants are defined as in the api spec */ + if (Normalizer::FORM_C != Normalizer::NFC) { + $res_str .= "Invalid normalization form declarations!\n"; + } + + $char_a_diaeresis = "\xC3\xA4"; // 'LATIN SMALL LETTER A WITH DIAERESIS' (U+00E4) + $char_a_ring = "\xC3\xA5"; // 'LATIN SMALL LETTER A WITH RING ABOVE' (U+00E5) + $char_o_diaeresis = "\xC3\xB6"; // 'LATIN SMALL LETTER O WITH DIAERESIS' (U+00F6) + + $char_angstrom_sign = "\xE2\x84\xAB"; // 'ANGSTROM SIGN' (U+212B) + $char_A_ring = "\xC3\x85"; // 'LATIN CAPITAL LETTER A WITH RING ABOVE' (U+00C5) + + $char_ohm_sign = "\xE2\x84\xA6"; // 'OHM SIGN' (U+2126) + $char_omega = "\xCE\xA9"; // 'GREEK CAPITAL LETTER OMEGA' (U+03A9) + + $char_combining_ring_above = "\xCC\x8A"; // 'COMBINING RING ABOVE' (U+030A) + + $char_fi_ligature = "\xEF\xAC\x81"; // 'LATIN SMALL LIGATURE FI' (U+FB01) + + $char_long_s_dot = "\xE1\xBA\x9B"; // 'LATIN SMALL LETTER LONG S WITH DOT ABOVE' (U+1E9B) + + $strs = array( + 'ABC', + 'abc', + $char_a_diaeresis . '||' . $char_a_ring . '||' . $char_o_diaeresis, + $char_angstrom_sign . '||' . $char_A_ring . '||' . 'A' . $char_combining_ring_above, + $char_ohm_sign . '||' . $char_omega, + $char_fi_ligature, + $char_long_s_dot, + ); + + foreach( $forms as $form ) + { + foreach( $strs as $str ) + { + if (Normalizer::NONE == $form) { + /* Hide deprecation warning. */ + $str_norm = @ut_norm_normalize( $str, $form ); + } else { + $str_norm = ut_norm_normalize( $str, $form ); + } + $error_code = intl_get_error_code(); + $error_message = intl_get_error_message(); + + $str_hex = urlencode($str); + $str_norm_hex = urlencode($str_norm); + $res_str .= "'$str_hex' normalized to form '{$forms_str[$form]}' is '$str_norm_hex'" + . "\terror info: '$error_message' ($error_code)\n" + . ""; + + $is_norm = ut_norm_is_normalized( $str, $form ); + $error_code = intl_get_error_code(); + $error_message = intl_get_error_message(); + + $res_str .= " is in form '{$forms_str[$form]}'? = " . ($is_norm ? "yes" : "no") + . "\terror info: '$error_message' ($error_code)\n" + . ""; + } + } + + return $res_str; +} + +include_once( 'ut_common.inc' ); +ut_run(); + +?> +--EXPECT-- +'ABC' normalized to form 'UNORM_FORM_KC_CF' is 'abc' error info: 'U_ZERO_ERROR' (0) + is in form 'UNORM_FORM_KC_CF'? = no error info: 'U_ZERO_ERROR' (0) +'abc' normalized to form 'UNORM_FORM_KC_CF' is 'abc' error info: 'U_ZERO_ERROR' (0) + is in form 'UNORM_FORM_KC_CF'? = yes error info: 'U_ZERO_ERROR' (0) +'%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' normalized to form 'UNORM_FORM_KC_CF' is '%C3%A4%7C%7C%C3%A5%7C%7C%C3%B6' error info: 'U_ZERO_ERROR' (0) + is in form 'UNORM_FORM_KC_CF'? = yes error info: 'U_ZERO_ERROR' (0) +'%E2%84%AB%7C%7C%C3%85%7C%7CA%CC%8A' normalized to form 'UNORM_FORM_KC_CF' is '%C3%A5%7C%7C%C3%A5%7C%7C%C3%A5' error info: 'U_ZERO_ERROR' (0) + is in form 'UNORM_FORM_KC_CF'? = no error info: 'U_ZERO_ERROR' (0) +'%E2%84%A6%7C%7C%CE%A9' normalized to form 'UNORM_FORM_KC_CF' is '%CF%89%7C%7C%CF%89' error info: 'U_ZERO_ERROR' (0) + is in form 'UNORM_FORM_KC_CF'? = no error info: 'U_ZERO_ERROR' (0) +'%EF%AC%81' normalized to form 'UNORM_FORM_KC_CF' is 'fi' error info: 'U_ZERO_ERROR' (0) + is in form 'UNORM_FORM_KC_CF'? = no error info: 'U_ZERO_ERROR' (0) +'%E1%BA%9B' normalized to form 'UNORM_FORM_KC_CF' is '%E1%B9%A1' error info: 'U_ZERO_ERROR' (0) + is in form 'UNORM_FORM_KC_CF'? = no error info: 'U_ZERO_ERROR' (0) |