diff options
author | Paul Crovella <paul.crovella@gmail.com> | 2018-03-20 17:33:23 -0700 |
---|---|---|
committer | Joe Watkins <krakjoe@php.net> | 2018-03-22 23:27:39 +0100 |
commit | b873d3c288e47822f3adc3f4825f18ca74554a9f (patch) | |
tree | d1c5fddb31ad1833921575166d0e529d0455ee04 | |
parent | a795bd82651e61d05b96a3efca2042308a3af451 (diff) | |
download | php-git-b873d3c288e47822f3adc3f4825f18ca74554a9f.tar.gz |
Add normalizer_get_raw_decomposition function
Implements #76111 https://bugs.php.net/bug.php?id=76111
-rw-r--r-- | ext/intl/normalizer/normalizer_class.c | 5 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer_normalize.c | 46 | ||||
-rw-r--r-- | ext/intl/normalizer/normalizer_normalize.h | 1 | ||||
-rw-r--r-- | ext/intl/php_intl.c | 5 | ||||
-rw-r--r-- | ext/intl/tests/normalizer_get_raw_decomposition.phpt | 67 | ||||
-rw-r--r-- | ext/intl/tests/ut_common.inc | 4 |
6 files changed, 128 insertions, 0 deletions
diff --git a/ext/intl/normalizer/normalizer_class.c b/ext/intl/normalizer/normalizer_class.c index 87b274ebfc..95b738ef07 100644 --- a/ext/intl/normalizer/normalizer_class.c +++ b/ext/intl/normalizer/normalizer_class.c @@ -34,6 +34,10 @@ ZEND_BEGIN_ARG_INFO_EX( normalizer_args, 0, 0, 1 ) ZEND_ARG_INFO( 0, form ) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX( decomposition_args, 0, 0, 1 ) + ZEND_ARG_INFO( 0, input ) +ZEND_END_ARG_INFO(); + /* }}} */ /* {{{ Normalizer_class_functions @@ -43,6 +47,7 @@ ZEND_END_ARG_INFO() static const zend_function_entry Normalizer_class_functions[] = { ZEND_FENTRY( normalize, ZEND_FN( normalizer_normalize ), normalizer_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC ) ZEND_FENTRY( isNormalized, ZEND_FN( normalizer_is_normalized ), normalizer_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC ) + ZEND_FENTRY( getRawDecomposition, ZEND_FN( normalizer_get_raw_decomposition ), decomposition_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC ) PHP_FE_END }; /* }}} */ diff --git a/ext/intl/normalizer/normalizer_normalize.c b/ext/intl/normalizer/normalizer_normalize.c index 52780acdbc..ad031de7d4 100644 --- a/ext/intl/normalizer/normalizer_normalize.c +++ b/ext/intl/normalizer/normalizer_normalize.c @@ -247,6 +247,52 @@ PHP_FUNCTION( normalizer_is_normalized ) } /* }}} */ +/* {{{ proto string|null Normalizer::getRawDecomposition( string $input ) + * Returns the Decomposition_Mapping property for the given UTF-8 encoded code point. }}} */ +/* {{{ proto string|null normalizer_get_raw_decomposition( string $input ) + * Returns the Decomposition_Mapping property for the given UTF-8 encoded code point. + */ +PHP_FUNCTION( normalizer_get_raw_decomposition ) +{ + char* input = NULL; + size_t input_length = 0; + + UChar32 codepoint = -1; + int32_t offset = 0; + + UErrorCode status = U_ZERO_ERROR; + const UNormalizer2 *nfkc = unorm2_getNFKCInstance(&status); + UChar decomposition[32]; + int32_t decomposition_length; + + intl_error_reset(NULL); + + if ((zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_length) == FAILURE)) { + return; + } + + U8_NEXT(input, offset, input_length, codepoint); + if ((size_t)offset != input_length) { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Input string must be exactly one UTF-8 encoded code point long.", 0); + return; + } + + if ((codepoint < UCHAR_MIN_VALUE) || (codepoint > UCHAR_MAX_VALUE)) { + intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR); + intl_error_set_custom_msg(NULL, "Code point out of range", 0); + return; + } + + decomposition_length = unorm2_getRawDecomposition(nfkc, codepoint, decomposition, 32, &status); + if (decomposition_length == -1) { + RETURN_NULL(); + } + + RETVAL_NEW_STR(intl_convert_utf16_to_utf8(decomposition, decomposition_length, &status)); +} +/* }}} */ + /* * Local variables: * tab-width: 4 diff --git a/ext/intl/normalizer/normalizer_normalize.h b/ext/intl/normalizer/normalizer_normalize.h index c282c56795..f8d0321777 100644 --- a/ext/intl/normalizer/normalizer_normalize.h +++ b/ext/intl/normalizer/normalizer_normalize.h @@ -21,5 +21,6 @@ PHP_FUNCTION( normalizer_normalize ); PHP_FUNCTION( normalizer_is_normalized ); +PHP_FUNCTION( normalizer_get_raw_decomposition ); #endif // NORMALIZER_NORMALIZE_H diff --git a/ext/intl/php_intl.c b/ext/intl/php_intl.c index 7b2f206e52..0afc96b081 100644 --- a/ext/intl/php_intl.c +++ b/ext/intl/php_intl.c @@ -206,6 +206,10 @@ ZEND_BEGIN_ARG_INFO_EX(normalizer_args, 0, 0, 1) ZEND_ARG_INFO(0, form) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(decomposition_args, 0, 0, 1) + ZEND_ARG_INFO(0, input) +ZEND_END_ARG_INFO(); + ZEND_BEGIN_ARG_INFO_EX(grapheme_1_arg, 0, 0, 1) ZEND_ARG_INFO(0, string) ZEND_END_ARG_INFO() @@ -662,6 +666,7 @@ static const zend_function_entry intl_functions[] = { /* normalizer functions */ PHP_FE( normalizer_normalize, normalizer_args ) PHP_FE( normalizer_is_normalized, normalizer_args ) + PHP_FE( normalizer_get_raw_decomposition, decomposition_args ) /* Locale functions */ PHP_NAMED_FE( locale_get_default, zif_locale_get_default, locale_0_args ) diff --git a/ext/intl/tests/normalizer_get_raw_decomposition.phpt b/ext/intl/tests/normalizer_get_raw_decomposition.phpt new file mode 100644 index 0000000000..69eb01366b --- /dev/null +++ b/ext/intl/tests/normalizer_get_raw_decomposition.phpt @@ -0,0 +1,67 @@ +--TEST-- +normalizer_get_raw_decomposition() +--SKIPIF-- +<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?> +--FILE-- +<?php + +/* + * Try getting raw decomposition mappings + * with procedural and class methods. + */ + +function ut_main() +{ + $result = ''; + $strings = [ + "a", + "\u{FFDA}", + "\u{FDFA}", + "", + "aa", + "\xF5", + ]; + + foreach ($strings as $string) { + $decomposition = ut_norm_get_raw_decomposition($string); + $error_code = intl_get_error_code(); + $error_message = intl_get_error_message(); + + $string_hex = bin2hex($string); + $result .= "---------------------\n"; + + if ($decomposition === null) { + $result .= "'$string_hex' has no decomposition mapping\n" ; + } else { + $result .= "'$string_hex' has the decomposition mapping '" . bin2hex($decomposition) . "'\n" ; + } + $result .= "error info: '$error_message' ($error_code)\n"; + } + + return $result; +} + +include_once( 'ut_common.inc' ); +ut_run(); + +?> +--EXPECT-- +--------------------- +'61' has no decomposition mapping +error info: 'U_ZERO_ERROR' (0) +--------------------- +'efbf9a' has the decomposition mapping 'e385a1' +error info: 'U_ZERO_ERROR' (0) +--------------------- +'efb7ba' has the decomposition mapping 'd8b5d984d98920d8a7d984d984d98720d8b9d984d98ad98720d988d8b3d984d985' +error info: 'U_ZERO_ERROR' (0) +--------------------- +'' has no decomposition mapping +error info: 'Input string must be exactly one UTF-8 encoded code point long.: U_ILLEGAL_ARGUMENT_ERROR' (1) +--------------------- +'6161' has no decomposition mapping +error info: 'Input string must be exactly one UTF-8 encoded code point long.: U_ILLEGAL_ARGUMENT_ERROR' (1) +--------------------- +'f5' has no decomposition mapping +error info: 'Code point out of range: U_ILLEGAL_ARGUMENT_ERROR' (1) + diff --git a/ext/intl/tests/ut_common.inc b/ext/intl/tests/ut_common.inc index 1b82cfa56c..7ca3bc687a 100644 --- a/ext/intl/tests/ut_common.inc +++ b/ext/intl/tests/ut_common.inc @@ -219,6 +219,10 @@ function ut_norm_is_normalized( $str, $form ) { return $GLOBALS['oo-mode'] ? Normalizer::isNormalized( $str, $form ) : normalizer_is_normalized( $str, $form ); } +function ut_norm_get_raw_decomposition( $str ) +{ + return $GLOBALS['oo-mode'] ? Normalizer::getRawDecomposition( $str ) : normalizer_get_raw_decomposition( $str ); +} /* * Wrappers around Collator methods to run them in either OO- or procedural mode. |