diff options
Diffstat (limited to 'ext/intl/msgformat/msgformat_helpers.cpp')
-rwxr-xr-x | ext/intl/msgformat/msgformat_helpers.cpp | 366 |
1 files changed, 216 insertions, 150 deletions
diff --git a/ext/intl/msgformat/msgformat_helpers.cpp b/ext/intl/msgformat/msgformat_helpers.cpp index 7a2770133e..b13c4c466f 100755 --- a/ext/intl/msgformat/msgformat_helpers.cpp +++ b/ext/intl/msgformat/msgformat_helpers.cpp @@ -135,12 +135,12 @@ static double umsg_helper_zval_to_millis(zval *z, UErrorCode *status TSRMLS_DC) static HashTable *umsg_parse_format(MessageFormatter_object *mfo, const MessagePattern& mp, - UErrorCode& uec) + intl_error& err TSRMLS_DC) { HashTable *ret; int32_t parts_count; - if (U_FAILURE(uec)) { + if (U_FAILURE(err.code)) { return NULL; } @@ -167,33 +167,35 @@ static HashTable *umsg_parse_format(MessageFormatter_object *mfo, */ /* The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT * which we need not examine. */ - for (int32_t i = 0; i < parts_count - 2 && U_SUCCESS(uec); i++) { - MessagePattern::Part p = mp.getPart(i); + for (int32_t i = 0; i < parts_count - 2 && U_SUCCESS(err.code); i++) { + MessagePattern::Part p = mp.getPart(i); - if (p.getType() != UMSGPAT_PART_TYPE_ARG_START) { + if (p.getType() != UMSGPAT_PART_TYPE_ARG_START) { continue; } - MessagePattern::Part name_part = mp.getPart(++i); /* Getting name, advancing i */ + MessagePattern::Part name_part = mp.getPart(++i); /* Getting name, advancing i */ Formattable::Type type, *storedType; - if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) { - UnicodeString argName = mp.getSubstring(name_part); + if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) { + UnicodeString argName = mp.getSubstring(name_part); if (zend_hash_find(ret, (char*)argName.getBuffer(), argName.length(), (void**)&storedType) == FAILURE) { /* not found already; create new entry in HT */ Formattable::Type bogusType = Formattable::kObject; if (zend_hash_update(ret, (char*)argName.getBuffer(), argName.length(), (void*)&bogusType, sizeof(bogusType), (void**)&storedType) == FAILURE) { - uec = U_MEMORY_ALLOCATION_ERROR; + intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR, + "Write to argument types hash table failed", 0 TSRMLS_CC); continue; } } - } else if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { - int32_t argNumber = name_part.getValue(); + } else if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { + int32_t argNumber = name_part.getValue(); if (argNumber < 0) { - uec = U_INVALID_FORMAT_ERROR; + intl_errors_set(&err, U_INVALID_FORMAT_ERROR, + "Found part with negative number", 0 TSRMLS_CC); continue; } if (zend_hash_index_find(ret, (ulong)argNumber, (void**)&storedType) @@ -202,73 +204,79 @@ static HashTable *umsg_parse_format(MessageFormatter_object *mfo, Formattable::Type bogusType = Formattable::kObject; if (zend_hash_index_update(ret, (ulong)argNumber, (void*)&bogusType, sizeof(bogusType), (void**)&storedType) == FAILURE) { - uec = U_MEMORY_ALLOCATION_ERROR; + intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR, + "Write to argument types hash table failed", 0 TSRMLS_CC); continue; } } - } + } - UMessagePatternArgType argType = p.getArgType(); - /* No type specified, treat it as a string */ - if (argType == UMSGPAT_ARG_TYPE_NONE) { - type = Formattable::kString; + UMessagePatternArgType argType = p.getArgType(); + /* No type specified, treat it as a string */ + if (argType == UMSGPAT_ARG_TYPE_NONE) { + type = Formattable::kString; } else { /* Some type was specified, might be simple or complicated */ - if (argType == UMSGPAT_ARG_TYPE_SIMPLE) { - /* For a SIMPLE arg, after the name part, there should be - * an ARG_TYPE part whose string value tells us what to do */ - MessagePattern::Part type_part = mp.getPart(++i); /* Getting type, advancing i */ - if (type_part.getType() == UMSGPAT_PART_TYPE_ARG_TYPE) { - UnicodeString typeString = mp.getSubstring(type_part); - /* This is all based on the rules in the docs for MessageFormat - * @see http://icu-project.org/apiref/icu4c/classMessageFormat.html */ - if (typeString == "number") { - MessagePattern::Part style_part = mp.getPart(i + 1); /* Not advancing i */ - if (style_part.getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { - UnicodeString styleString = mp.getSubstring(style_part); - if (styleString == "integer") { - type = Formattable::kInt64; - } else if (styleString == "currency") { - type = Formattable::kDouble; - } else if (styleString == "percent") { - type = Formattable::kDouble; - } - } else { // if missing style, part, make it a double - type = Formattable::kDouble; - } - } else if ((typeString == "date") || (typeString == "time")) { - type = Formattable::kDate; - } else if ((typeString == "spellout") || (typeString == "ordinal") + if (argType == UMSGPAT_ARG_TYPE_SIMPLE) { + /* For a SIMPLE arg, after the name part, there should be + * an ARG_TYPE part whose string value tells us what to do */ + MessagePattern::Part type_part = mp.getPart(++i); /* Getting type, advancing i */ + if (type_part.getType() == UMSGPAT_PART_TYPE_ARG_TYPE) { + UnicodeString typeString = mp.getSubstring(type_part); + /* This is all based on the rules in the docs for MessageFormat + * @see http://icu-project.org/apiref/icu4c/classMessageFormat.html */ + if (typeString == "number") { + MessagePattern::Part style_part = mp.getPart(i + 1); /* Not advancing i */ + if (style_part.getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { + UnicodeString styleString = mp.getSubstring(style_part); + if (styleString == "integer") { + type = Formattable::kInt64; + } else if (styleString == "currency") { + type = Formattable::kDouble; + } else if (styleString == "percent") { + type = Formattable::kDouble; + } else { /* some style invalid/unknown to us */ + type = Formattable::kDouble; + } + } else { // if missing style, part, make it a double + type = Formattable::kDouble; + } + } else if ((typeString == "date") || (typeString == "time")) { + type = Formattable::kDate; + } else if ((typeString == "spellout") || (typeString == "ordinal") || (typeString == "duration")) { - type = Formattable::kDouble; - } - } else { - /* If there's no UMSGPAT_PART_TYPE_ARG_TYPE right after a - * UMSGPAT_ARG_TYPE_SIMPLE argument, then the pattern - * is broken. */ - uec = U_PARSE_ERROR; - continue; - } - } else if (argType == UMSGPAT_ARG_TYPE_PLURAL) { - type = Formattable::kDouble; - } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { - type = Formattable::kDouble; - } else if (argType == UMSGPAT_ARG_TYPE_SELECT) { - type = Formattable::kString; - } else { - type = Formattable::kString; - } - } /* was type specified? */ + type = Formattable::kDouble; + } + } else { + /* If there's no UMSGPAT_PART_TYPE_ARG_TYPE right after a + * UMSGPAT_ARG_TYPE_SIMPLE argument, then the pattern + * is broken. */ + intl_errors_set(&err, U_PARSE_ERROR, + "Expected UMSGPAT_PART_TYPE_ARG_TYPE part following " + "UMSGPAT_ARG_TYPE_SIMPLE part", 0 TSRMLS_CC); + continue; + } + } else if (argType == UMSGPAT_ARG_TYPE_PLURAL) { + type = Formattable::kDouble; + } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { + type = Formattable::kDouble; + } else if (argType == UMSGPAT_ARG_TYPE_SELECT) { + type = Formattable::kString; + } else { + type = Formattable::kString; + } + } /* was type specified? */ /* We found a different type for the same arg! */ if (*storedType != Formattable::kObject && *storedType != type) { - uec = U_ARGUMENT_TYPE_MISMATCH; + intl_errors_set(&err, U_ARGUMENT_TYPE_MISMATCH, + "Inconsistent types declared for an argument", 0 TSRMLS_CC); continue; } *storedType = type; } /* visiting each part */ - if (U_FAILURE(uec)) { + if (U_FAILURE(err.code)) { zend_hash_destroy(ret); efree(ret); @@ -280,22 +288,27 @@ static HashTable *umsg_parse_format(MessageFormatter_object *mfo, return ret; } -U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC) +U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, + HashTable *args, + UChar **formatted, + int *formatted_len TSRMLS_DC) { int arg_count = zend_hash_num_elements(args); std::vector<Formattable> fargs; std::vector<UnicodeString> farg_names; MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf; - const MessagePattern mp = MessageFormatAdapter::getMessagePattern(mf); + const MessagePattern mp = MessageFormatAdapter::getMessagePattern(mf); HashTable *types; + intl_error& err = INTL_DATA_ERROR(mfo); + + if (U_FAILURE(err.code)) { + return; + } fargs.resize(arg_count); farg_names.resize(arg_count); - types = umsg_parse_format(mfo, mp, *status); - if (U_FAILURE(*status)) { - return; - } + types = umsg_parse_format(mfo, mp, err TSRMLS_CC); int argNum = 0; HashPosition pos; @@ -308,7 +321,7 @@ U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, U ulong num_index; for (zend_hash_internal_pointer_reset_ex(args, &pos); - U_SUCCESS(*status) && + U_SUCCESS(err.code) && (key_type = zend_hash_get_current_key_ex( args, &str_index, &str_len, &num_index, 0, &pos), zend_hash_get_current_data_ex(args, (void **)&elem, &pos) @@ -323,20 +336,27 @@ U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, U /* Process key and retrieve type */ if (key_type == HASH_KEY_IS_LONG) { /* includes case where index < 0 because it's exposed as unsigned */ - if (num_index > INT32_MAX) { - *status = U_ILLEGAL_ARGUMENT_ERROR; + if (num_index > (ulong)INT32_MAX) { + intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR, + "Found negative or too large array key", 0 TSRMLS_CC); continue; } - UChar temp[16]; + UChar temp[16]; int32_t len = u_sprintf(temp, "%u", (uint32_t)num_index); - key.append(temp, len); + key.append(temp, len); zend_hash_index_find(types, (ulong)num_index, (void**)&storedArgType); } else { //string; assumed to be in UTF-8 - intl_stringFromChar(key, str_index, str_len-1, status); - if (U_FAILURE(*status)) { - continue; + intl_stringFromChar(key, str_index, str_len-1, &err.code); + + if (U_FAILURE(err.code)) { + char *message; + spprintf(&message, 0, + "Invalid UTF-8 data in argument key: '%s'", str_index); + intl_errors_set(&err, err.code, message, 1 TSRMLS_CC); + efree(message); + continue; } zend_hash_find(types, (char*)key.getBuffer(), key.length(), @@ -350,107 +370,153 @@ U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, U /* Convert zval to formattable according to message format type * or (as a fallback) the zval type */ if (argType != Formattable::kObject) { - switch (argType) { + switch (argType) { case Formattable::kString: { -string_arg: //XXX: make function - /* This implicitly converts objects */ + string_arg: + /* This implicitly converts objects + * Note that our vectors will leak if object conversion fails + * and PHP ends up with a fatal error and calls longjmp + * as a result of that. + */ convert_to_string_ex(elem); UnicodeString *text = new UnicodeString(); - intl_stringFromChar(*text, Z_STRVAL_PP(elem), Z_STRLEN_PP(elem), status); - if (U_FAILURE(*status)) { + intl_stringFromChar(*text, + Z_STRVAL_PP(elem), Z_STRLEN_PP(elem), &err.code); + + if (U_FAILURE(err.code)) { + char *message; + spprintf(&message, 0, "Invalid UTF-8 data in string argument: " + "'%s'", Z_STRVAL_PP(elem)); + intl_errors_set(&err, err.code, message, 1 TSRMLS_CC); + efree(message); delete text; continue; } formattable.adoptString(text); break; } - case Formattable::kDouble: - { - double d; - if (Z_TYPE_PP(elem) == IS_DOUBLE) { - d = Z_DVAL_PP(elem); - } else if (Z_TYPE_PP(elem) == IS_LONG) { - d = (double)Z_LVAL_PP(elem); - } else { - SEPARATE_ZVAL_IF_NOT_REF(elem); - convert_scalar_to_number(*elem TSRMLS_CC); - d = (Z_TYPE_PP(elem) == IS_DOUBLE) + case Formattable::kDouble: + { + double d; + if (Z_TYPE_PP(elem) == IS_DOUBLE) { + d = Z_DVAL_PP(elem); + } else if (Z_TYPE_PP(elem) == IS_LONG) { + d = (double)Z_LVAL_PP(elem); + } else { + SEPARATE_ZVAL_IF_NOT_REF(elem); + convert_scalar_to_number(*elem TSRMLS_CC); + d = (Z_TYPE_PP(elem) == IS_DOUBLE) ? Z_DVAL_PP(elem) : (double)Z_LVAL_PP(elem); - } + } formattable.setDouble(d); - break; - } - case Formattable::kInt64: - { - int64_t tInt64; - if (Z_TYPE_PP(elem) == IS_DOUBLE) { - tInt64 = (int64_t)Z_DVAL_PP(elem); - } else if (Z_TYPE_PP(elem) == IS_LONG) { - tInt64 = (int64_t)Z_LVAL_PP(elem); - } else { - SEPARATE_ZVAL_IF_NOT_REF(elem); - convert_scalar_to_number(*elem TSRMLS_CC); - tInt64 = (Z_TYPE_PP(elem) == IS_DOUBLE) + break; + } + case Formattable::kInt64: + { + int64_t tInt64; + if (Z_TYPE_PP(elem) == IS_DOUBLE) { + tInt64 = (int64_t)Z_DVAL_PP(elem); + } else if (Z_TYPE_PP(elem) == IS_LONG) { + tInt64 = (int64_t)Z_LVAL_PP(elem); + } else { + SEPARATE_ZVAL_IF_NOT_REF(elem); + convert_scalar_to_number(*elem TSRMLS_CC); + tInt64 = (Z_TYPE_PP(elem) == IS_DOUBLE) ? (int64_t)Z_DVAL_PP(elem) : Z_LVAL_PP(elem); - } - formattable.setInt64(tInt64); - break; - } - case Formattable::kDate: - { - double dd = umsg_helper_zval_to_millis(*elem, status TSRMLS_CC); - if (U_FAILURE(*status)) { + } + formattable.setInt64(tInt64); + break; + } + case Formattable::kDate: + { + double dd = umsg_helper_zval_to_millis(*elem, &err.code TSRMLS_CC); + if (U_FAILURE(err.code)) { + char *message, *key_char; + int key_len; + UErrorCode status = UErrorCode(); + if (intl_charFromString(key, &key_char, &key_len, + &status) == SUCCESS) { + spprintf(&message, 0, "The argument for key '%s' " + "cannot be used as a date or time", key_char); + intl_errors_set(&err, err.code, message, 1 TSRMLS_CC); + efree(key_char); + efree(message); + } continue; } - formattable.setDate(dd); - break; - } - } - } else { - /* We couldn't find any information about the argument in the pattern, this - * means it's an extra argument. So convert it to a number if it's a number or - * bool or null and to a string if it's anything else. */ - switch (Z_TYPE_PP(elem)) { - case IS_DOUBLE: - formattable.setDouble(Z_DVAL_PP(elem)); - break; - case IS_BOOL: - convert_to_long_ex(elem); - /* Intentional fallthrough */ - case IS_LONG: - formattable.setInt64((int64_t)Z_LVAL_PP(elem)); - break; - case IS_NULL: - formattable.setInt64((int64_t)0); - break; - default: - goto string_arg; - } + formattable.setDate(dd); + break; + } + } + } else { + /* We couldn't find any information about the argument in the pattern, this + * means it's an extra argument. So convert it to a number if it's a number or + * bool or null and to a string if it's anything else except arrays . */ + switch (Z_TYPE_PP(elem)) { + case IS_DOUBLE: + formattable.setDouble(Z_DVAL_PP(elem)); + break; + case IS_BOOL: + convert_to_long_ex(elem); + /* Intentional fallthrough */ + case IS_LONG: + formattable.setInt64((int64_t)Z_LVAL_PP(elem)); + break; + case IS_NULL: + formattable.setInt64((int64_t)0); + break; + case IS_STRING: + case IS_OBJECT: + goto string_arg; + default: + { + char *message, *key_char; + int key_len; + UErrorCode status = UErrorCode(); + if (intl_charFromString(key, &key_char, &key_len, + &status) == SUCCESS) { + spprintf(&message, 0, "No strategy to convert the " + "value given for the argument with key '%s' " + "is available", key_char); + intl_errors_set(&err, + U_ILLEGAL_ARGUMENT_ERROR, message, 1 TSRMLS_CC); + efree(key_char); + efree(message); + } + } + } } - } // visiting each argument + } // visiting each argument - if (U_FAILURE(*status)){ - return; - } + if (U_FAILURE(err.code)) { + return; + } UnicodeString resultStr; FieldPosition fieldPosition(0); - /* format the message */ + /* format the message */ mf->format(farg_names.empty() ? NULL : &farg_names[0], - fargs.empty() ? NULL : &fargs[0], arg_count, resultStr, *status); + fargs.empty() ? NULL : &fargs[0], arg_count, resultStr, err.code); - if (U_FAILURE(*status)) { - return; - } + if (U_FAILURE(err.code)) { + intl_errors_set(&err, err.code, + "Call to ICU MessageFormat::format() has failed", 0 TSRMLS_CC); + return; + } *formatted_len = resultStr.length(); *formatted = eumalloc(*formatted_len+1); - resultStr.extract(*formatted, *formatted_len+1, *status); + resultStr.extract(*formatted, *formatted_len+1, err.code); + if (U_FAILURE(err.code)) { + intl_errors_set(&err, err.code, + "Error copying format() result", 0 TSRMLS_CC); + return; + } } #define cleanup_zvals() for(int j=i;j>=0;j--) { zval_ptr_dtor((*args)+i); } |