summaryrefslogtreecommitdiff
path: root/ext/intl/msgformat/msgformat_helpers.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ext/intl/msgformat/msgformat_helpers.cpp')
-rwxr-xr-xext/intl/msgformat/msgformat_helpers.cpp366
1 files changed, 216 insertions, 150 deletions
diff --git a/ext/intl/msgformat/msgformat_helpers.cpp b/ext/intl/msgformat/msgformat_helpers.cpp
index 7a2770133e..b13c4c466f 100755
--- a/ext/intl/msgformat/msgformat_helpers.cpp
+++ b/ext/intl/msgformat/msgformat_helpers.cpp
@@ -135,12 +135,12 @@ static double umsg_helper_zval_to_millis(zval *z, UErrorCode *status TSRMLS_DC)
static HashTable *umsg_parse_format(MessageFormatter_object *mfo,
const MessagePattern& mp,
- UErrorCode& uec)
+ intl_error& err TSRMLS_DC)
{
HashTable *ret;
int32_t parts_count;
- if (U_FAILURE(uec)) {
+ if (U_FAILURE(err.code)) {
return NULL;
}
@@ -167,33 +167,35 @@ static HashTable *umsg_parse_format(MessageFormatter_object *mfo,
*/
/* The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
* which we need not examine. */
- for (int32_t i = 0; i < parts_count - 2 && U_SUCCESS(uec); i++) {
- MessagePattern::Part p = mp.getPart(i);
+ for (int32_t i = 0; i < parts_count - 2 && U_SUCCESS(err.code); i++) {
+ MessagePattern::Part p = mp.getPart(i);
- if (p.getType() != UMSGPAT_PART_TYPE_ARG_START) {
+ if (p.getType() != UMSGPAT_PART_TYPE_ARG_START) {
continue;
}
- MessagePattern::Part name_part = mp.getPart(++i); /* Getting name, advancing i */
+ MessagePattern::Part name_part = mp.getPart(++i); /* Getting name, advancing i */
Formattable::Type type,
*storedType;
- if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) {
- UnicodeString argName = mp.getSubstring(name_part);
+ if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) {
+ UnicodeString argName = mp.getSubstring(name_part);
if (zend_hash_find(ret, (char*)argName.getBuffer(), argName.length(),
(void**)&storedType) == FAILURE) {
/* not found already; create new entry in HT */
Formattable::Type bogusType = Formattable::kObject;
if (zend_hash_update(ret, (char*)argName.getBuffer(), argName.length(),
(void*)&bogusType, sizeof(bogusType), (void**)&storedType) == FAILURE) {
- uec = U_MEMORY_ALLOCATION_ERROR;
+ intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR,
+ "Write to argument types hash table failed", 0 TSRMLS_CC);
continue;
}
}
- } else if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
- int32_t argNumber = name_part.getValue();
+ } else if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
+ int32_t argNumber = name_part.getValue();
if (argNumber < 0) {
- uec = U_INVALID_FORMAT_ERROR;
+ intl_errors_set(&err, U_INVALID_FORMAT_ERROR,
+ "Found part with negative number", 0 TSRMLS_CC);
continue;
}
if (zend_hash_index_find(ret, (ulong)argNumber, (void**)&storedType)
@@ -202,73 +204,79 @@ static HashTable *umsg_parse_format(MessageFormatter_object *mfo,
Formattable::Type bogusType = Formattable::kObject;
if (zend_hash_index_update(ret, (ulong)argNumber, (void*)&bogusType,
sizeof(bogusType), (void**)&storedType) == FAILURE) {
- uec = U_MEMORY_ALLOCATION_ERROR;
+ intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR,
+ "Write to argument types hash table failed", 0 TSRMLS_CC);
continue;
}
}
- }
+ }
- UMessagePatternArgType argType = p.getArgType();
- /* No type specified, treat it as a string */
- if (argType == UMSGPAT_ARG_TYPE_NONE) {
- type = Formattable::kString;
+ UMessagePatternArgType argType = p.getArgType();
+ /* No type specified, treat it as a string */
+ if (argType == UMSGPAT_ARG_TYPE_NONE) {
+ type = Formattable::kString;
} else { /* Some type was specified, might be simple or complicated */
- if (argType == UMSGPAT_ARG_TYPE_SIMPLE) {
- /* For a SIMPLE arg, after the name part, there should be
- * an ARG_TYPE part whose string value tells us what to do */
- MessagePattern::Part type_part = mp.getPart(++i); /* Getting type, advancing i */
- if (type_part.getType() == UMSGPAT_PART_TYPE_ARG_TYPE) {
- UnicodeString typeString = mp.getSubstring(type_part);
- /* This is all based on the rules in the docs for MessageFormat
- * @see http://icu-project.org/apiref/icu4c/classMessageFormat.html */
- if (typeString == "number") {
- MessagePattern::Part style_part = mp.getPart(i + 1); /* Not advancing i */
- if (style_part.getType() == UMSGPAT_PART_TYPE_ARG_STYLE) {
- UnicodeString styleString = mp.getSubstring(style_part);
- if (styleString == "integer") {
- type = Formattable::kInt64;
- } else if (styleString == "currency") {
- type = Formattable::kDouble;
- } else if (styleString == "percent") {
- type = Formattable::kDouble;
- }
- } else { // if missing style, part, make it a double
- type = Formattable::kDouble;
- }
- } else if ((typeString == "date") || (typeString == "time")) {
- type = Formattable::kDate;
- } else if ((typeString == "spellout") || (typeString == "ordinal")
+ if (argType == UMSGPAT_ARG_TYPE_SIMPLE) {
+ /* For a SIMPLE arg, after the name part, there should be
+ * an ARG_TYPE part whose string value tells us what to do */
+ MessagePattern::Part type_part = mp.getPart(++i); /* Getting type, advancing i */
+ if (type_part.getType() == UMSGPAT_PART_TYPE_ARG_TYPE) {
+ UnicodeString typeString = mp.getSubstring(type_part);
+ /* This is all based on the rules in the docs for MessageFormat
+ * @see http://icu-project.org/apiref/icu4c/classMessageFormat.html */
+ if (typeString == "number") {
+ MessagePattern::Part style_part = mp.getPart(i + 1); /* Not advancing i */
+ if (style_part.getType() == UMSGPAT_PART_TYPE_ARG_STYLE) {
+ UnicodeString styleString = mp.getSubstring(style_part);
+ if (styleString == "integer") {
+ type = Formattable::kInt64;
+ } else if (styleString == "currency") {
+ type = Formattable::kDouble;
+ } else if (styleString == "percent") {
+ type = Formattable::kDouble;
+ } else { /* some style invalid/unknown to us */
+ type = Formattable::kDouble;
+ }
+ } else { // if missing style, part, make it a double
+ type = Formattable::kDouble;
+ }
+ } else if ((typeString == "date") || (typeString == "time")) {
+ type = Formattable::kDate;
+ } else if ((typeString == "spellout") || (typeString == "ordinal")
|| (typeString == "duration")) {
- type = Formattable::kDouble;
- }
- } else {
- /* If there's no UMSGPAT_PART_TYPE_ARG_TYPE right after a
- * UMSGPAT_ARG_TYPE_SIMPLE argument, then the pattern
- * is broken. */
- uec = U_PARSE_ERROR;
- continue;
- }
- } else if (argType == UMSGPAT_ARG_TYPE_PLURAL) {
- type = Formattable::kDouble;
- } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) {
- type = Formattable::kDouble;
- } else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
- type = Formattable::kString;
- } else {
- type = Formattable::kString;
- }
- } /* was type specified? */
+ type = Formattable::kDouble;
+ }
+ } else {
+ /* If there's no UMSGPAT_PART_TYPE_ARG_TYPE right after a
+ * UMSGPAT_ARG_TYPE_SIMPLE argument, then the pattern
+ * is broken. */
+ intl_errors_set(&err, U_PARSE_ERROR,
+ "Expected UMSGPAT_PART_TYPE_ARG_TYPE part following "
+ "UMSGPAT_ARG_TYPE_SIMPLE part", 0 TSRMLS_CC);
+ continue;
+ }
+ } else if (argType == UMSGPAT_ARG_TYPE_PLURAL) {
+ type = Formattable::kDouble;
+ } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) {
+ type = Formattable::kDouble;
+ } else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
+ type = Formattable::kString;
+ } else {
+ type = Formattable::kString;
+ }
+ } /* was type specified? */
/* We found a different type for the same arg! */
if (*storedType != Formattable::kObject && *storedType != type) {
- uec = U_ARGUMENT_TYPE_MISMATCH;
+ intl_errors_set(&err, U_ARGUMENT_TYPE_MISMATCH,
+ "Inconsistent types declared for an argument", 0 TSRMLS_CC);
continue;
}
*storedType = type;
} /* visiting each part */
- if (U_FAILURE(uec)) {
+ if (U_FAILURE(err.code)) {
zend_hash_destroy(ret);
efree(ret);
@@ -280,22 +288,27 @@ static HashTable *umsg_parse_format(MessageFormatter_object *mfo,
return ret;
}
-U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC)
+U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo,
+ HashTable *args,
+ UChar **formatted,
+ int *formatted_len TSRMLS_DC)
{
int arg_count = zend_hash_num_elements(args);
std::vector<Formattable> fargs;
std::vector<UnicodeString> farg_names;
MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf;
- const MessagePattern mp = MessageFormatAdapter::getMessagePattern(mf);
+ const MessagePattern mp = MessageFormatAdapter::getMessagePattern(mf);
HashTable *types;
+ intl_error& err = INTL_DATA_ERROR(mfo);
+
+ if (U_FAILURE(err.code)) {
+ return;
+ }
fargs.resize(arg_count);
farg_names.resize(arg_count);
- types = umsg_parse_format(mfo, mp, *status);
- if (U_FAILURE(*status)) {
- return;
- }
+ types = umsg_parse_format(mfo, mp, err TSRMLS_CC);
int argNum = 0;
HashPosition pos;
@@ -308,7 +321,7 @@ U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, U
ulong num_index;
for (zend_hash_internal_pointer_reset_ex(args, &pos);
- U_SUCCESS(*status) &&
+ U_SUCCESS(err.code) &&
(key_type = zend_hash_get_current_key_ex(
args, &str_index, &str_len, &num_index, 0, &pos),
zend_hash_get_current_data_ex(args, (void **)&elem, &pos)
@@ -323,20 +336,27 @@ U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, U
/* Process key and retrieve type */
if (key_type == HASH_KEY_IS_LONG) {
/* includes case where index < 0 because it's exposed as unsigned */
- if (num_index > INT32_MAX) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
+ if (num_index > (ulong)INT32_MAX) {
+ intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR,
+ "Found negative or too large array key", 0 TSRMLS_CC);
continue;
}
- UChar temp[16];
+ UChar temp[16];
int32_t len = u_sprintf(temp, "%u", (uint32_t)num_index);
- key.append(temp, len);
+ key.append(temp, len);
zend_hash_index_find(types, (ulong)num_index, (void**)&storedArgType);
} else { //string; assumed to be in UTF-8
- intl_stringFromChar(key, str_index, str_len-1, status);
- if (U_FAILURE(*status)) {
- continue;
+ intl_stringFromChar(key, str_index, str_len-1, &err.code);
+
+ if (U_FAILURE(err.code)) {
+ char *message;
+ spprintf(&message, 0,
+ "Invalid UTF-8 data in argument key: '%s'", str_index);
+ intl_errors_set(&err, err.code, message, 1 TSRMLS_CC);
+ efree(message);
+ continue;
}
zend_hash_find(types, (char*)key.getBuffer(), key.length(),
@@ -350,107 +370,153 @@ U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, U
/* Convert zval to formattable according to message format type
* or (as a fallback) the zval type */
if (argType != Formattable::kObject) {
- switch (argType) {
+ switch (argType) {
case Formattable::kString:
{
-string_arg: //XXX: make function
- /* This implicitly converts objects */
+ string_arg:
+ /* This implicitly converts objects
+ * Note that our vectors will leak if object conversion fails
+ * and PHP ends up with a fatal error and calls longjmp
+ * as a result of that.
+ */
convert_to_string_ex(elem);
UnicodeString *text = new UnicodeString();
- intl_stringFromChar(*text, Z_STRVAL_PP(elem), Z_STRLEN_PP(elem), status);
- if (U_FAILURE(*status)) {
+ intl_stringFromChar(*text,
+ Z_STRVAL_PP(elem), Z_STRLEN_PP(elem), &err.code);
+
+ if (U_FAILURE(err.code)) {
+ char *message;
+ spprintf(&message, 0, "Invalid UTF-8 data in string argument: "
+ "'%s'", Z_STRVAL_PP(elem));
+ intl_errors_set(&err, err.code, message, 1 TSRMLS_CC);
+ efree(message);
delete text;
continue;
}
formattable.adoptString(text);
break;
}
- case Formattable::kDouble:
- {
- double d;
- if (Z_TYPE_PP(elem) == IS_DOUBLE) {
- d = Z_DVAL_PP(elem);
- } else if (Z_TYPE_PP(elem) == IS_LONG) {
- d = (double)Z_LVAL_PP(elem);
- } else {
- SEPARATE_ZVAL_IF_NOT_REF(elem);
- convert_scalar_to_number(*elem TSRMLS_CC);
- d = (Z_TYPE_PP(elem) == IS_DOUBLE)
+ case Formattable::kDouble:
+ {
+ double d;
+ if (Z_TYPE_PP(elem) == IS_DOUBLE) {
+ d = Z_DVAL_PP(elem);
+ } else if (Z_TYPE_PP(elem) == IS_LONG) {
+ d = (double)Z_LVAL_PP(elem);
+ } else {
+ SEPARATE_ZVAL_IF_NOT_REF(elem);
+ convert_scalar_to_number(*elem TSRMLS_CC);
+ d = (Z_TYPE_PP(elem) == IS_DOUBLE)
? Z_DVAL_PP(elem)
: (double)Z_LVAL_PP(elem);
- }
+ }
formattable.setDouble(d);
- break;
- }
- case Formattable::kInt64:
- {
- int64_t tInt64;
- if (Z_TYPE_PP(elem) == IS_DOUBLE) {
- tInt64 = (int64_t)Z_DVAL_PP(elem);
- } else if (Z_TYPE_PP(elem) == IS_LONG) {
- tInt64 = (int64_t)Z_LVAL_PP(elem);
- } else {
- SEPARATE_ZVAL_IF_NOT_REF(elem);
- convert_scalar_to_number(*elem TSRMLS_CC);
- tInt64 = (Z_TYPE_PP(elem) == IS_DOUBLE)
+ break;
+ }
+ case Formattable::kInt64:
+ {
+ int64_t tInt64;
+ if (Z_TYPE_PP(elem) == IS_DOUBLE) {
+ tInt64 = (int64_t)Z_DVAL_PP(elem);
+ } else if (Z_TYPE_PP(elem) == IS_LONG) {
+ tInt64 = (int64_t)Z_LVAL_PP(elem);
+ } else {
+ SEPARATE_ZVAL_IF_NOT_REF(elem);
+ convert_scalar_to_number(*elem TSRMLS_CC);
+ tInt64 = (Z_TYPE_PP(elem) == IS_DOUBLE)
? (int64_t)Z_DVAL_PP(elem)
: Z_LVAL_PP(elem);
- }
- formattable.setInt64(tInt64);
- break;
- }
- case Formattable::kDate:
- {
- double dd = umsg_helper_zval_to_millis(*elem, status TSRMLS_CC);
- if (U_FAILURE(*status)) {
+ }
+ formattable.setInt64(tInt64);
+ break;
+ }
+ case Formattable::kDate:
+ {
+ double dd = umsg_helper_zval_to_millis(*elem, &err.code TSRMLS_CC);
+ if (U_FAILURE(err.code)) {
+ char *message, *key_char;
+ int key_len;
+ UErrorCode status = UErrorCode();
+ if (intl_charFromString(key, &key_char, &key_len,
+ &status) == SUCCESS) {
+ spprintf(&message, 0, "The argument for key '%s' "
+ "cannot be used as a date or time", key_char);
+ intl_errors_set(&err, err.code, message, 1 TSRMLS_CC);
+ efree(key_char);
+ efree(message);
+ }
continue;
}
- formattable.setDate(dd);
- break;
- }
- }
- } else {
- /* We couldn't find any information about the argument in the pattern, this
- * means it's an extra argument. So convert it to a number if it's a number or
- * bool or null and to a string if it's anything else. */
- switch (Z_TYPE_PP(elem)) {
- case IS_DOUBLE:
- formattable.setDouble(Z_DVAL_PP(elem));
- break;
- case IS_BOOL:
- convert_to_long_ex(elem);
- /* Intentional fallthrough */
- case IS_LONG:
- formattable.setInt64((int64_t)Z_LVAL_PP(elem));
- break;
- case IS_NULL:
- formattable.setInt64((int64_t)0);
- break;
- default:
- goto string_arg;
- }
+ formattable.setDate(dd);
+ break;
+ }
+ }
+ } else {
+ /* We couldn't find any information about the argument in the pattern, this
+ * means it's an extra argument. So convert it to a number if it's a number or
+ * bool or null and to a string if it's anything else except arrays . */
+ switch (Z_TYPE_PP(elem)) {
+ case IS_DOUBLE:
+ formattable.setDouble(Z_DVAL_PP(elem));
+ break;
+ case IS_BOOL:
+ convert_to_long_ex(elem);
+ /* Intentional fallthrough */
+ case IS_LONG:
+ formattable.setInt64((int64_t)Z_LVAL_PP(elem));
+ break;
+ case IS_NULL:
+ formattable.setInt64((int64_t)0);
+ break;
+ case IS_STRING:
+ case IS_OBJECT:
+ goto string_arg;
+ default:
+ {
+ char *message, *key_char;
+ int key_len;
+ UErrorCode status = UErrorCode();
+ if (intl_charFromString(key, &key_char, &key_len,
+ &status) == SUCCESS) {
+ spprintf(&message, 0, "No strategy to convert the "
+ "value given for the argument with key '%s' "
+ "is available", key_char);
+ intl_errors_set(&err,
+ U_ILLEGAL_ARGUMENT_ERROR, message, 1 TSRMLS_CC);
+ efree(key_char);
+ efree(message);
+ }
+ }
+ }
}
- } // visiting each argument
+ } // visiting each argument
- if (U_FAILURE(*status)){
- return;
- }
+ if (U_FAILURE(err.code)) {
+ return;
+ }
UnicodeString resultStr;
FieldPosition fieldPosition(0);
- /* format the message */
+ /* format the message */
mf->format(farg_names.empty() ? NULL : &farg_names[0],
- fargs.empty() ? NULL : &fargs[0], arg_count, resultStr, *status);
+ fargs.empty() ? NULL : &fargs[0], arg_count, resultStr, err.code);
- if (U_FAILURE(*status)) {
- return;
- }
+ if (U_FAILURE(err.code)) {
+ intl_errors_set(&err, err.code,
+ "Call to ICU MessageFormat::format() has failed", 0 TSRMLS_CC);
+ return;
+ }
*formatted_len = resultStr.length();
*formatted = eumalloc(*formatted_len+1);
- resultStr.extract(*formatted, *formatted_len+1, *status);
+ resultStr.extract(*formatted, *formatted_len+1, err.code);
+ if (U_FAILURE(err.code)) {
+ intl_errors_set(&err, err.code,
+ "Error copying format() result", 0 TSRMLS_CC);
+ return;
+ }
}
#define cleanup_zvals() for(int j=i;j>=0;j--) { zval_ptr_dtor((*args)+i); }