summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Crovella <paul.crovella@gmail.com>2018-03-20 17:33:23 -0700
committerJoe Watkins <krakjoe@php.net>2018-03-22 23:27:39 +0100
commitb873d3c288e47822f3adc3f4825f18ca74554a9f (patch)
treed1c5fddb31ad1833921575166d0e529d0455ee04
parenta795bd82651e61d05b96a3efca2042308a3af451 (diff)
downloadphp-git-b873d3c288e47822f3adc3f4825f18ca74554a9f.tar.gz
Add normalizer_get_raw_decomposition function
Implements #76111 https://bugs.php.net/bug.php?id=76111
-rw-r--r--ext/intl/normalizer/normalizer_class.c5
-rw-r--r--ext/intl/normalizer/normalizer_normalize.c46
-rw-r--r--ext/intl/normalizer/normalizer_normalize.h1
-rw-r--r--ext/intl/php_intl.c5
-rw-r--r--ext/intl/tests/normalizer_get_raw_decomposition.phpt67
-rw-r--r--ext/intl/tests/ut_common.inc4
6 files changed, 128 insertions, 0 deletions
diff --git a/ext/intl/normalizer/normalizer_class.c b/ext/intl/normalizer/normalizer_class.c
index 87b274ebfc..95b738ef07 100644
--- a/ext/intl/normalizer/normalizer_class.c
+++ b/ext/intl/normalizer/normalizer_class.c
@@ -34,6 +34,10 @@ ZEND_BEGIN_ARG_INFO_EX( normalizer_args, 0, 0, 1 )
ZEND_ARG_INFO( 0, form )
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_INFO_EX( decomposition_args, 0, 0, 1 )
+ ZEND_ARG_INFO( 0, input )
+ZEND_END_ARG_INFO();
+
/* }}} */
/* {{{ Normalizer_class_functions
@@ -43,6 +47,7 @@ ZEND_END_ARG_INFO()
static const zend_function_entry Normalizer_class_functions[] = {
ZEND_FENTRY( normalize, ZEND_FN( normalizer_normalize ), normalizer_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC )
ZEND_FENTRY( isNormalized, ZEND_FN( normalizer_is_normalized ), normalizer_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC )
+ ZEND_FENTRY( getRawDecomposition, ZEND_FN( normalizer_get_raw_decomposition ), decomposition_args, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC )
PHP_FE_END
};
/* }}} */
diff --git a/ext/intl/normalizer/normalizer_normalize.c b/ext/intl/normalizer/normalizer_normalize.c
index 52780acdbc..ad031de7d4 100644
--- a/ext/intl/normalizer/normalizer_normalize.c
+++ b/ext/intl/normalizer/normalizer_normalize.c
@@ -247,6 +247,52 @@ PHP_FUNCTION( normalizer_is_normalized )
}
/* }}} */
+/* {{{ proto string|null Normalizer::getRawDecomposition( string $input )
+ * Returns the Decomposition_Mapping property for the given UTF-8 encoded code point. }}} */
+/* {{{ proto string|null normalizer_get_raw_decomposition( string $input )
+ * Returns the Decomposition_Mapping property for the given UTF-8 encoded code point.
+ */
+PHP_FUNCTION( normalizer_get_raw_decomposition )
+{
+ char* input = NULL;
+ size_t input_length = 0;
+
+ UChar32 codepoint = -1;
+ int32_t offset = 0;
+
+ UErrorCode status = U_ZERO_ERROR;
+ const UNormalizer2 *nfkc = unorm2_getNFKCInstance(&status);
+ UChar decomposition[32];
+ int32_t decomposition_length;
+
+ intl_error_reset(NULL);
+
+ if ((zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_length) == FAILURE)) {
+ return;
+ }
+
+ U8_NEXT(input, offset, input_length, codepoint);
+ if ((size_t)offset != input_length) {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Input string must be exactly one UTF-8 encoded code point long.", 0);
+ return;
+ }
+
+ if ((codepoint < UCHAR_MIN_VALUE) || (codepoint > UCHAR_MAX_VALUE)) {
+ intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
+ intl_error_set_custom_msg(NULL, "Code point out of range", 0);
+ return;
+ }
+
+ decomposition_length = unorm2_getRawDecomposition(nfkc, codepoint, decomposition, 32, &status);
+ if (decomposition_length == -1) {
+ RETURN_NULL();
+ }
+
+ RETVAL_NEW_STR(intl_convert_utf16_to_utf8(decomposition, decomposition_length, &status));
+}
+/* }}} */
+
/*
* Local variables:
* tab-width: 4
diff --git a/ext/intl/normalizer/normalizer_normalize.h b/ext/intl/normalizer/normalizer_normalize.h
index c282c56795..f8d0321777 100644
--- a/ext/intl/normalizer/normalizer_normalize.h
+++ b/ext/intl/normalizer/normalizer_normalize.h
@@ -21,5 +21,6 @@
PHP_FUNCTION( normalizer_normalize );
PHP_FUNCTION( normalizer_is_normalized );
+PHP_FUNCTION( normalizer_get_raw_decomposition );
#endif // NORMALIZER_NORMALIZE_H
diff --git a/ext/intl/php_intl.c b/ext/intl/php_intl.c
index 7b2f206e52..0afc96b081 100644
--- a/ext/intl/php_intl.c
+++ b/ext/intl/php_intl.c
@@ -206,6 +206,10 @@ ZEND_BEGIN_ARG_INFO_EX(normalizer_args, 0, 0, 1)
ZEND_ARG_INFO(0, form)
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_INFO_EX(decomposition_args, 0, 0, 1)
+ ZEND_ARG_INFO(0, input)
+ZEND_END_ARG_INFO();
+
ZEND_BEGIN_ARG_INFO_EX(grapheme_1_arg, 0, 0, 1)
ZEND_ARG_INFO(0, string)
ZEND_END_ARG_INFO()
@@ -662,6 +666,7 @@ static const zend_function_entry intl_functions[] = {
/* normalizer functions */
PHP_FE( normalizer_normalize, normalizer_args )
PHP_FE( normalizer_is_normalized, normalizer_args )
+ PHP_FE( normalizer_get_raw_decomposition, decomposition_args )
/* Locale functions */
PHP_NAMED_FE( locale_get_default, zif_locale_get_default, locale_0_args )
diff --git a/ext/intl/tests/normalizer_get_raw_decomposition.phpt b/ext/intl/tests/normalizer_get_raw_decomposition.phpt
new file mode 100644
index 0000000000..69eb01366b
--- /dev/null
+++ b/ext/intl/tests/normalizer_get_raw_decomposition.phpt
@@ -0,0 +1,67 @@
+--TEST--
+normalizer_get_raw_decomposition()
+--SKIPIF--
+<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
+--FILE--
+<?php
+
+/*
+ * Try getting raw decomposition mappings
+ * with procedural and class methods.
+ */
+
+function ut_main()
+{
+ $result = '';
+ $strings = [
+ "a",
+ "\u{FFDA}",
+ "\u{FDFA}",
+ "",
+ "aa",
+ "\xF5",
+ ];
+
+ foreach ($strings as $string) {
+ $decomposition = ut_norm_get_raw_decomposition($string);
+ $error_code = intl_get_error_code();
+ $error_message = intl_get_error_message();
+
+ $string_hex = bin2hex($string);
+ $result .= "---------------------\n";
+
+ if ($decomposition === null) {
+ $result .= "'$string_hex' has no decomposition mapping\n" ;
+ } else {
+ $result .= "'$string_hex' has the decomposition mapping '" . bin2hex($decomposition) . "'\n" ;
+ }
+ $result .= "error info: '$error_message' ($error_code)\n";
+ }
+
+ return $result;
+}
+
+include_once( 'ut_common.inc' );
+ut_run();
+
+?>
+--EXPECT--
+---------------------
+'61' has no decomposition mapping
+error info: 'U_ZERO_ERROR' (0)
+---------------------
+'efbf9a' has the decomposition mapping 'e385a1'
+error info: 'U_ZERO_ERROR' (0)
+---------------------
+'efb7ba' has the decomposition mapping 'd8b5d984d98920d8a7d984d984d98720d8b9d984d98ad98720d988d8b3d984d985'
+error info: 'U_ZERO_ERROR' (0)
+---------------------
+'' has no decomposition mapping
+error info: 'Input string must be exactly one UTF-8 encoded code point long.: U_ILLEGAL_ARGUMENT_ERROR' (1)
+---------------------
+'6161' has no decomposition mapping
+error info: 'Input string must be exactly one UTF-8 encoded code point long.: U_ILLEGAL_ARGUMENT_ERROR' (1)
+---------------------
+'f5' has no decomposition mapping
+error info: 'Code point out of range: U_ILLEGAL_ARGUMENT_ERROR' (1)
+
diff --git a/ext/intl/tests/ut_common.inc b/ext/intl/tests/ut_common.inc
index 1b82cfa56c..7ca3bc687a 100644
--- a/ext/intl/tests/ut_common.inc
+++ b/ext/intl/tests/ut_common.inc
@@ -219,6 +219,10 @@ function ut_norm_is_normalized( $str, $form )
{
return $GLOBALS['oo-mode'] ? Normalizer::isNormalized( $str, $form ) : normalizer_is_normalized( $str, $form );
}
+function ut_norm_get_raw_decomposition( $str )
+{
+ return $GLOBALS['oo-mode'] ? Normalizer::getRawDecomposition( $str ) : normalizer_get_raw_decomposition( $str );
+}
/*
* Wrappers around Collator methods to run them in either OO- or procedural mode.