diff options
-rw-r--r-- | NEWS | 1 | ||||
-rwxr-xr-x | ext/intl/common/common_error.c | 3 | ||||
-rw-r--r-- | ext/intl/idn/idn.c | 233 | ||||
-rwxr-xr-x | ext/intl/php_intl.c | 6 | ||||
-rw-r--r-- | ext/intl/tests/idn_uts46_basic.phpt | 53 | ||||
-rw-r--r-- | ext/intl/tests/idn_uts46_errors.phpt | 89 |
6 files changed, 364 insertions, 21 deletions
@@ -39,6 +39,7 @@ PHP NEWS - Intl: . Fixed memory leak in several Intl locale functions. (Felipe) + . Added support for UTS #46. (Gustavo) - Mbstring . Fixed bug #60306 (Characters lost while converting from cp936 to utf8). diff --git a/ext/intl/common/common_error.c b/ext/intl/common/common_error.c index 3ab7fdfbdd..14d9cebfe7 100755 --- a/ext/intl/common/common_error.c +++ b/ext/intl/common/common_error.c @@ -232,7 +232,6 @@ void intl_expose_icu_error_codes( INIT_FUNC_ARGS ) INTL_EXPOSE_CONST( U_REGEX_ERROR_LIMIT ); /* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes */ -#if defined(U_IDNA_PROHIBITED_ERROR) INTL_EXPOSE_CONST( U_IDNA_PROHIBITED_ERROR ); INTL_EXPOSE_CONST( U_IDNA_ERROR_START ); INTL_EXPOSE_CONST( U_IDNA_UNASSIGNED_ERROR ); @@ -242,8 +241,8 @@ void intl_expose_icu_error_codes( INIT_FUNC_ARGS ) INTL_EXPOSE_CONST( U_IDNA_VERIFICATION_ERROR ); INTL_EXPOSE_CONST( U_IDNA_LABEL_TOO_LONG_ERROR ); INTL_EXPOSE_CONST( U_IDNA_ZERO_LENGTH_LABEL_ERROR ); + INTL_EXPOSE_CONST( U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR ); INTL_EXPOSE_CONST( U_IDNA_ERROR_LIMIT ); -#endif /* Aliases for StringPrep */ INTL_EXPOSE_CONST( U_STRINGPREP_PROHIBITED_ERROR ); diff --git a/ext/intl/idn/idn.c b/ext/intl/idn/idn.c index 23cd0ea872..833718e75b 100644 --- a/ext/intl/idn/idn.c +++ b/ext/intl/idn/idn.c @@ -29,14 +29,25 @@ #include "ext/standard/php_string.h" #include "intl_error.h" - #include "intl_convert.h" +#include "intl_convert.h" /* }}} */ +#ifdef UIDNA_INFO_INITIALIZER +#define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */ +#endif + +enum { + INTL_IDN_VARIANT_2003 = 0, + INTL_IDN_VARIANT_UTS46 +}; + /* {{{ grapheme_register_constants * Register API constants */ void idn_register_constants( INIT_FUNC_ARGS ) { + /* OPTIONS */ + /* Option to prohibit processing of unassigned codepoints in the input and do not check if the input conforms to STD-3 ASCII rules. */ REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT); @@ -46,6 +57,50 @@ void idn_register_constants( INIT_FUNC_ARGS ) /* Option to check if input conforms to STD-3 ASCII rules */ REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT); + +#ifdef HAVE_46_API + + /* Option to check for whether the input conforms to the BiDi rules. + * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */ + REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT); + + /* Option to check for whether the input conforms to the CONTEXTJ rules. + * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */ + REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT); + + /* Option for nontransitional processing in ToASCII(). + * By default, ToASCII() uses transitional processing. + * Ignored by the IDNA2003 implementation. */ + REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT); + + /* Option for nontransitional processing in ToUnicode(). + * By default, ToUnicode() uses transitional processing. + * Ignored by the IDNA2003 implementation. */ + REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT); +#endif + + /* VARIANTS */ + REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT); +#ifdef HAVE_46_API + REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT); +#endif + +#ifdef HAVE_46_API + /* PINFO ERROR CODES */ + REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT); +#endif } /* }}} */ @@ -54,11 +109,100 @@ enum { INTL_IDN_TO_UTF8 }; -static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode) +/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */ +static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC) +{ + intl_error_set_code(NULL, err TSRMLS_CC); + if (U_FAILURE(err)) { + char *buff; + spprintf(&buff, 0, "%s: %s", + mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8", + msg); + intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC); + efree(buff); + return FAILURE; + } + + return SUCCESS; +} + +static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC) +{ + php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC); +} + +#ifdef HAVE_46_API +static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS, + const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info) +{ + UErrorCode status = U_ZERO_ERROR; + UIDNA *uts46; + int32_t len; + int32_t buffer_capac = 255; /* no domain name may exceed this */ + char *buffer = emalloc(buffer_capac); + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + int buffer_used = 0; + + uts46 = uidna_openUTS46(option, &status); + if (php_intl_idn_check_status(status, "failed to open UIDNA instance", + mode TSRMLS_CC) == FAILURE) { + efree(buffer); + RETURN_FALSE; + } + + if (mode == INTL_IDN_TO_ASCII) { + len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len, + buffer, buffer_capac, &info, &status); + } else { + len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len, + buffer, buffer_capac, &info, &status); + } + if (php_intl_idn_check_status(status, "failed to convert name", + mode TSRMLS_CC) == FAILURE) { + uidna_close(uts46); + efree(buffer); + RETURN_FALSE; + } + if (len >= 255) { + php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an unexpected length"); + } + + buffer[len] = '\0'; + + if (info.errors == 0) { + RETVAL_STRINGL(buffer, len, 0); + buffer_used = 1; + } else { + RETVAL_FALSE; + } + + if (idna_info) { + if (buffer_used) { /* used in return_value then */ + zval_addref_p(return_value); + add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value); + } else { + zval *zv; + ALLOC_INIT_ZVAL(zv); + ZVAL_STRINGL(zv, buffer, len, 0); + buffer_used = 1; + add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv); + } + add_assoc_bool_ex(idna_info, "isTransitionalDifferent", + sizeof("isTransitionalDifferent"), info.isTransitionalDifferent); + add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors); + } + + if (!buffer_used) { + efree(buffer); + } + + uidna_close(uts46); +} +#endif + +static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, + const char *domain, int domain_len, uint32_t option, int mode) { - unsigned char* domain; - int domain_len; - long option = 0; UChar* ustring = NULL; int ustring_len = 0; UErrorCode status; @@ -67,18 +211,9 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode) UChar converted[MAXPATHLEN]; int32_t converted_ret_len; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) { - return; - } - - if (domain_len < 1) { - intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC ); - RETURN_FALSE; - } - /* convert the string to UTF-16. */ status = U_ZERO_ERROR; - intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status ); + intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status); if (U_FAILURE(status)) { intl_error_set_code(NULL, status TSRMLS_CC); @@ -123,11 +258,75 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode) RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0); } +static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode) +{ + char *domain; + int domain_len; + long option = 0, + variant = INTL_IDN_VARIANT_2003; + zval *idna_info = NULL; + + intl_error_reset(NULL TSRMLS_CC); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz", + &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) { + php_intl_bad_args("bad arguments", mode TSRMLS_CC); + RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */ + } + +#ifdef HAVE_46_API + if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) { + php_intl_bad_args("invalid variant, must be one of {" + "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC); + RETURN_FALSE; + } +#else + if (variant != INTL_IDN_VARIANT_2003) { + php_intl_bad_args("invalid variant, PHP was compiled against " + "an old version of ICU and only supports INTL_IDN_VARIANT_2003", + mode TSRMLS_CC); + RETURN_FALSE; + } +#endif + + if (domain_len < 1) { + php_intl_bad_args("empty domain name", mode TSRMLS_CC); + RETURN_FALSE; + } + if (domain_len > INT32_MAX - 1) { + php_intl_bad_args("domain name too large", mode TSRMLS_CC); + RETURN_FALSE; + } + /* don't check options; it wasn't checked before */ + + if (idna_info != NULL) { + if (variant == INTL_IDN_VARIANT_2003) { + php_error_docref0(NULL TSRMLS_CC, E_NOTICE, + "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only " + "takes 3 - extra argument ignored"); + } else { + zval_dtor(idna_info); + array_init(idna_info); + } + } + + if (variant == INTL_IDN_VARIANT_2003) { + php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, + domain, domain_len, (uint32_t)option, mode); + } +#ifdef HAVE_46_API + else { + php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len, + (uint32_t)option, mode, idna_info); + } +#endif +} + /* {{{ proto int idn_to_ascii(string domain[, int options]) Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */ PHP_FUNCTION(idn_to_ascii) { - php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII); + php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII); } /* }}} */ @@ -136,7 +335,7 @@ PHP_FUNCTION(idn_to_ascii) Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */ PHP_FUNCTION(idn_to_utf8) { - php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8); + php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8); } /* }}} */ diff --git a/ext/intl/php_intl.c b/ext/intl/php_intl.c index de5226b3d8..efe0ddd242 100755 --- a/ext/intl/php_intl.c +++ b/ext/intl/php_intl.c @@ -335,13 +335,15 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_ascii, 0, 0, 1) ZEND_ARG_INFO(0, domain) ZEND_ARG_INFO(0, option) - ZEND_ARG_INFO(0, status) + ZEND_ARG_INFO(0, variant) + ZEND_ARG_INFO(1, idn_info) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_utf8, 0, 0, 1) ZEND_ARG_INFO(0, domain) ZEND_ARG_INFO(0, option) - ZEND_ARG_INFO(0, status) + ZEND_ARG_INFO(0, variant) + ZEND_ARG_INFO(1, idn_info) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX( arginfo_resourcebundle_create_proc, 0, 0, 2 ) diff --git a/ext/intl/tests/idn_uts46_basic.phpt b/ext/intl/tests/idn_uts46_basic.phpt new file mode 100644 index 0000000000..2ca185092d --- /dev/null +++ b/ext/intl/tests/idn_uts46_basic.phpt @@ -0,0 +1,53 @@ +--TEST-- +IDN UTS #46 API basic tests +--SKIPIF-- +<?php + if (!extension_loaded('intl')) + die('skip'); + if (!defined('INTL_IDNA_VARIANT_UTS46')) + die('skip no UTS #46 API'); +--FILE-- +<?php +$utf8dn = "www.fußball.com"; +$asciiNonTrans = "www.xn--fuball-cta.com"; + +echo "all ok, no details:", "\n"; +var_dump(idn_to_ascii($utf8dn, + IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46)); + +echo "all ok, no details, transitional:", "\n"; +var_dump(idn_to_ascii($utf8dn, 0, INTL_IDNA_VARIANT_UTS46)); + +echo "all ok, with details:", "\n"; +var_dump(idn_to_ascii($utf8dn, IDNA_NONTRANSITIONAL_TO_ASCII, + INTL_IDNA_VARIANT_UTS46, $info)); +var_dump($info); + +echo "reverse, ok, with details:", "\n"; +var_dump(idn_to_utf8($asciiNonTrans, 0, INTL_IDNA_VARIANT_UTS46, $info)); +var_dump($info); +--EXPECT-- +all ok, no details: +string(22) "www.xn--fuball-cta.com" +all ok, no details, transitional: +string(16) "www.fussball.com" +all ok, with details: +string(22) "www.xn--fuball-cta.com" +array(3) { + ["result"]=> + string(22) "www.xn--fuball-cta.com" + ["isTransitionalDifferent"]=> + bool(true) + ["errors"]=> + int(0) +} +reverse, ok, with details: +string(16) "www.fußball.com" +array(3) { + ["result"]=> + string(16) "www.fußball.com" + ["isTransitionalDifferent"]=> + bool(false) + ["errors"]=> + int(0) +} diff --git a/ext/intl/tests/idn_uts46_errors.phpt b/ext/intl/tests/idn_uts46_errors.phpt new file mode 100644 index 0000000000..a336e698c4 --- /dev/null +++ b/ext/intl/tests/idn_uts46_errors.phpt @@ -0,0 +1,89 @@ +--TEST-- +IDN UTS #46 API error tests +--SKIPIF-- +<?php + if (!extension_loaded('intl')) + die('skip'); + if (!defined('INTL_IDNA_VARIANT_UTS46')) + die('skip no UTS #46 API'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +echo "=> PHP level errors", "\n"; + +echo "bad args:", "\n"; +var_dump(idn_to_ascii("", 0, array())); +var_dump(idn_to_ascii("", 0, INTL_IDNA_VARIANT_UTS46, $foo, null)); + +echo "bad variant:", "\n"; +var_dump(idn_to_ascii("", 0, INTL_IDNA_VARIANT_UTS46 + 10)); + +echo "empty domain:", "\n"; +var_dump(idn_to_ascii("", 0, INTL_IDNA_VARIANT_UTS46)); + +echo "fourth arg for 2003 variant (only notice raised):", "\n"; +var_dump(idn_to_ascii("foo.com", 0, INTL_IDNA_VARIANT_2003, $foo)); + +echo "with error, but no details arg:", "\n"; +var_dump(idn_to_ascii("www.fußball.com-", 0, INTL_IDNA_VARIANT_UTS46)); + +echo "with error, with details arg:", "\n"; +var_dump(idn_to_ascii("www.fußball.com-", IDNA_NONTRANSITIONAL_TO_ASCII, + INTL_IDNA_VARIANT_UTS46, $foo)); +var_dump($foo); + +echo "with error, with details arg, contextj:", "\n"; +var_dump(idn_to_ascii( + html_entity_decode("www.a‍b.com", 0, "UTF-8"), + IDNA_NONTRANSITIONAL_TO_ASCII | IDNA_CHECK_CONTEXTJ, + INTL_IDNA_VARIANT_UTS46, $foo)); +var_dump($foo); +var_dump($foo["errors"]==IDNA_ERROR_CONTEXTJ); +--EXPECTF-- +=> PHP level errors +bad args: + +Warning: idn_to_ascii() expects parameter 3 to be long, array given in %s on line %d + +Warning: idn_to_ascii(): idn_to_ascii: bad arguments in %s on line %d +NULL + +Warning: idn_to_ascii() expects at most 4 parameters, 5 given in %s on line %d + +Warning: idn_to_ascii(): idn_to_ascii: bad arguments in %s on line %d +NULL +bad variant: + +Warning: idn_to_ascii(): idn_to_ascii: invalid variant, must be one of {INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46} in %s on line %d +bool(false) +empty domain: + +Warning: idn_to_ascii(): idn_to_ascii: empty domain name in %s on line %d +bool(false) +fourth arg for 2003 variant (only notice raised): + +Notice: idn_to_ascii(): 4 arguments were provided, but INTL_IDNA_VARIANT_2003 only takes 3 - extra argument ignored in %s on line %d +string(7) "foo.com" +with error, but no details arg: +bool(false) +with error, with details arg: +bool(false) +array(3) { + ["result"]=> + string(23) "www.xn--fuball-cta.com-" + ["isTransitionalDifferent"]=> + bool(true) + ["errors"]=> + int(16) +} +with error, with details arg, contextj: +bool(false) +array(3) { + ["result"]=> + string(18) "www.xn--ab-m1t.com" + ["isTransitionalDifferent"]=> + bool(true) + ["errors"]=> + int(4096) +} +bool(true) |