diff options
author | Gustavo André dos Santos Lopes <cataphract@php.net> | 2012-05-05 12:07:19 +0200 |
---|---|---|
committer | Gustavo André dos Santos Lopes <cataphract@php.net> | 2012-05-13 20:51:43 +0100 |
commit | c9b913b56bda2740de346ce508383d9e8a99883f (patch) | |
tree | d057008453a9f5c5a81ab73473e1c73132f53d6d /ext/intl/msgformat/msgformat_helpers.cpp | |
parent | 6b16f7cd690ecd9a32723c69d90888401c83914b (diff) | |
download | php-git-c9b913b56bda2740de346ce508383d9e8a99883f.tar.gz |
Orig patch for FR #61871 by David Sklar
Diffstat (limited to 'ext/intl/msgformat/msgformat_helpers.cpp')
-rwxr-xr-x | ext/intl/msgformat/msgformat_helpers.cpp | 346 |
1 files changed, 283 insertions, 63 deletions
diff --git a/ext/intl/msgformat/msgformat_helpers.cpp b/ext/intl/msgformat/msgformat_helpers.cpp index 508bdb6658..67d3558590 100755 --- a/ext/intl/msgformat/msgformat_helpers.cpp +++ b/ext/intl/msgformat/msgformat_helpers.cpp @@ -24,6 +24,9 @@ #include <math.h> #include <unicode/msgfmt.h> #include <unicode/chariter.h> +#include <unicode/messagepattern.h> + +#include <map> extern "C" { #include "php_intl.h" @@ -31,6 +34,9 @@ extern "C" { #include "msgformat_format.h" #include "msgformat_helpers.h" #include "intl_convert.h" +/* avoid redefinition of int8_t, already defined in unicode/pwin32.h */ +#define _MSC_STDINT_H_ 1 +#include "ext/date/php_date.h" } U_NAMESPACE_BEGIN @@ -43,87 +49,301 @@ class MessageFormatAdapter { public: static const Formattable::Type* getArgTypeList(const MessageFormat& m, int32_t& count); + static const MessagePattern getMessagePattern(MessageFormat* m); }; const Formattable::Type* MessageFormatAdapter::getArgTypeList(const MessageFormat& m, int32_t& count) { return m.getArgTypeList(count); } +const MessagePattern +MessageFormatAdapter::getMessagePattern(MessageFormat* m) { + return m->msgPattern; +} U_NAMESPACE_END -U_CFUNC int32_t umsg_format_arg_count(UMessageFormat *fmt) +U_CFUNC int32_t umsg_format_arg_count(UMessageFormat *fmt) { int32_t fmt_count = 0; MessageFormatAdapter::getArgTypeList(*(const MessageFormat*)fmt, fmt_count); return fmt_count; } -U_CFUNC void umsg_format_helper(UMessageFormat *fmt, int arg_count, zval **args, UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC) +double umsg_helper_zval_to_millis(zval *z, UErrorCode *status TSRMLS_DC) { + double rv = 0.0; + if (Z_TYPE_P(z) == IS_DOUBLE) { + rv = U_MILLIS_PER_SECOND * Z_DVAL_P(z); + } + else if (Z_TYPE_P(z) == IS_LONG) { + rv = U_MILLIS_PER_SECOND * (double) Z_LVAL_P(z); + } + else if (Z_TYPE_P(z) == IS_OBJECT) { + /* Borrowed from datefmt_format() in intl/dateformat/dateformat_format.c */ + if (instanceof_function(Z_OBJCE_P(z), php_date_get_date_ce() TSRMLS_CC)) { + zval retval; + zval *zfuncname; + INIT_ZVAL(retval); + MAKE_STD_ZVAL(zfuncname); + ZVAL_STRING(zfuncname, "getTimestamp", 1); + if (call_user_function(NULL, &(z), zfuncname, &retval, 0, NULL TSRMLS_CC) != SUCCESS || Z_TYPE(retval) != IS_LONG) { + *status = U_RESOURCE_TYPE_MISMATCH; + } else { + rv = U_MILLIS_PER_SECOND * (double) Z_LVAL(retval); + } + zval_ptr_dtor(&zfuncname); + } else { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + } + return rv; +} + +U_CFUNC void umsg_format_helper(UMessageFormat *fmt, int arg_count, zval **args, char **arg_names, UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC) { - int fmt_count = 0; - const Formattable::Type* argTypes = - MessageFormatAdapter::getArgTypeList(*(const MessageFormat*)fmt, fmt_count); - Formattable* fargs = new Formattable[fmt_count ? fmt_count : 1]; - - for(int32_t i = 0; i < fmt_count; ++i) { - UChar *stringVal = NULL; - int stringLen = 0; - int64_t tInt64 = 0; - - switch(argTypes[i]) { - case Formattable::kDate: - convert_to_long_ex(&args[i]); - fargs[i].setDate(U_MILLIS_PER_SECOND * (double)Z_LVAL_P(args[i])); - break; - - case Formattable::kDouble: - convert_to_double_ex(&args[i]); - fargs[i].setDouble(Z_DVAL_P(args[i])); - break; - - case Formattable::kLong: - convert_to_long_ex(&args[i]); - fargs[i].setLong(Z_LVAL_P(args[i])); - break; - - case Formattable::kInt64: - if(Z_TYPE_P(args[i]) == IS_DOUBLE) { - tInt64 = (int64_t)Z_DVAL_P(args[i]); - } else if(Z_TYPE_P(args[i]) == IS_LONG) { - tInt64 = (int64_t)Z_LVAL_P(args[i]); - } else { - SEPARATE_ZVAL_IF_NOT_REF(&args[i]); - convert_scalar_to_number( args[i] TSRMLS_CC ); - tInt64 = (Z_TYPE_P(args[i]) == IS_DOUBLE)?(int64_t)Z_DVAL_P(args[i]):Z_LVAL_P(args[i]); - } - fargs[i].setInt64(tInt64); - break; - - case Formattable::kString: - convert_to_string_ex(&args[i]); - intl_convert_utf8_to_utf16(&stringVal, &stringLen, Z_STRVAL_P(args[i]), Z_STRLEN_P(args[i]), status); - if(U_FAILURE(*status)){ - delete[] fargs; - return; - } - fargs[i].setString(stringVal); - efree(stringVal); - break; - - case Formattable::kArray: - case Formattable::kObject: - *status = U_UNSUPPORTED_ERROR; - delete[] fargs; - return; - } - } + int fmt_count; + int32_t i; + Formattable* fargs; + UnicodeString *farg_names; + MessageFormat *mf = (MessageFormat *) fmt; + MessagePattern mp = MessageFormatAdapter::getMessagePattern(mf); + std::map<UnicodeString, Formattable::Type> argTypesNamed; + std::map<int32_t, Formattable::Type> argTypesNumbered; + + int32_t usingNamedArguments = mf->usesNamedArguments(); + + /* + looking through the pattern, go to each arg_start part type. + the arg-typeof that tells us the argument type (simple, complicated) + then the next part is either the arg_name or arg number + and then if it's simple after that there could be a part-type=arg-type whise substring will tell us number, spellout, etc + if the next thing isn't an arg-type then assume string + same name that appears more than once in a complicated pattern will appear more than once, we could + -- ignore subsequent occurances + -- complain if types differ? + */ + + int32_t parts_count = mp.countParts(); + + for (i = 0; i < parts_count; i++) { + MessagePattern::Part p = mp.getPart(i); + if (p.getType() == UMSGPAT_PART_TYPE_ARG_START) { + MessagePattern::Part name_part = mp.getPart(++i); /* Getting name, advancing i */ + UnicodeString argName; + int32_t argNumber; + if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) { + argName = mp.getSubstring(name_part); + } + else if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { + argNumber = name_part.getValue(); + } + /* If we haven't seen this arg name before */ + int seenBefore = usingNamedArguments ? argTypesNamed.count(argName) : argTypesNumbered.count(argNumber); + if (0 == seenBefore) { + Formattable::Type fargType; + UMessagePatternArgType argType = p.getArgType(); + /* No type specified, treat it as a string */ + if (argType == UMSGPAT_ARG_TYPE_NONE) { + fargType = Formattable::kString; + } + /* Some type was specified, might be simple or complicated */ + else { + if (argType == UMSGPAT_ARG_TYPE_SIMPLE) { + /* For a SIMPLE arg, after the name part, there should be + * an ARG_TYPE part whose string value tells us what to do */ + MessagePattern::Part type_part = mp.getPart(++i); /* Getting type, advancing i */ + if (type_part.getType() == UMSGPAT_PART_TYPE_ARG_TYPE) { + UnicodeString typeString = mp.getSubstring(type_part); + /* This is all based on the rules in the docs for MessageFormat + * @see http://icu-project.org/apiref/icu4c/classMessageFormat.html */ + if (typeString == "number") { + MessagePattern::Part style_part = mp.getPart(i + 1); /* Not advancing i */ + if (style_part.getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { + UnicodeString styleString = mp.getSubstring(style_part); + if (styleString == "integer") { + fargType = Formattable::kInt64; + } + else if (styleString == "currency") { + fargType = Formattable::kDouble; + } + else if (styleString == "percent") { + fargType = Formattable::kDouble; + } + } + // if missing style, part, make it a double + else { + fargType = Formattable::kDouble; + } + } + else if ((typeString == "date") || (typeString == "time")) { + fargType = Formattable::kDate; + } + else if ((typeString == "spellout") || (typeString == "ordinal") || (typeString == "duration")) { + fargType = Formattable::kDouble; + } + + } + else { + /* If there's no UMSGPAT_PART_TYPE_ARG_TYPE right after a + * UMSGPAT_ARG_TYPE_SIMPLE argument, then the pattern + * is broken. */ + *status = U_PARSE_ERROR; + return; + } + } + else if (argType == UMSGPAT_ARG_TYPE_PLURAL) { + fargType = Formattable::kDouble; + } + else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { + fargType = Formattable::kDouble; + } + else if (argType == UMSGPAT_ARG_TYPE_SELECT) { + fargType = Formattable::kString; + } + else { + fargType = Formattable::kString; + } + } /* was type specified? */ + if (usingNamedArguments) { + argTypesNamed.insert(std::pair<UnicodeString, Formattable::Type>(argName, fargType)); + } else { + argTypesNumbered.insert(std::pair<int32_t, Formattable::Type>(argNumber, fargType)); + } + } /* Haven't seen arg before? */ + } /* checking for ARG_START */ + } /* visiting each part */ + +#define CLEANUP_AND_RETURN_ON_ERROR(status) do { \ + if (U_FAILURE(*status)) { \ + delete[] fargs; \ + if (usingNamedArguments) { \ + delete[] farg_names; \ + } \ + return; \ + } \ + } while (0) + + + fmt_count = arg_count; + fargs = new Formattable[fmt_count]; + if (usingNamedArguments) { + farg_names = new UnicodeString[fmt_count]; + } + for (int32_t i = 0; i < fmt_count; ++i) { + UChar* text = NULL; + int textLen = 0; + int found = 0; + Formattable::Type argType; + + if (usingNamedArguments) { + intl_convert_utf8_to_utf16(&text, &textLen, arg_names[i], strlen(arg_names[i]), status); + CLEANUP_AND_RETURN_ON_ERROR(status); + farg_names[i].setTo(text, textLen); + efree(text); + text = NULL; textLen = 0; + std::map<UnicodeString, Formattable::Type>::iterator it; + it = argTypesNamed.find(farg_names[i]); + if (it != argTypesNamed.end()) { + argType = it->second; + found = 1; + } + } + else { + std::map<int32_t, Formattable::Type>::iterator it; + it = argTypesNumbered.find(i); + if (it != argTypesNumbered.end()) { + argType = it->second; + found = 1; + } + } + if (found) { + switch (argType) { + case Formattable::kString: + /* This implicitly converts objects by attempting to call __toString() */ + convert_to_string_ex(&args[i]); + intl_convert_utf8_to_utf16(&text, &textLen, Z_STRVAL_P(args[i]), Z_STRLEN_P(args[i]), status); + CLEANUP_AND_RETURN_ON_ERROR(status); + fargs[i].setString(text); + efree(text); + text = NULL; textLen = 0; + break; + case Formattable::kDouble: + { + double d; + if(Z_TYPE_P(args[i]) == IS_DOUBLE) { + d = Z_DVAL_P(args[i]); + } else if(Z_TYPE_P(args[i]) == IS_LONG) { + d = (double)Z_LVAL_P(args[i]); + } else { + SEPARATE_ZVAL_IF_NOT_REF(&args[i]); + convert_scalar_to_number( args[i] TSRMLS_CC ); + d = (Z_TYPE_P(args[i]) == IS_DOUBLE)?Z_DVAL_P(args[i]):(double)Z_LVAL_P(args[i]); + } + fargs[i].setDouble(d); + break; + } + case Formattable::kInt64: + { + int64_t tInt64; + if(Z_TYPE_P(args[i]) == IS_DOUBLE) { + tInt64 = (int64_t)Z_DVAL_P(args[i]); + } else if(Z_TYPE_P(args[i]) == IS_LONG) { + tInt64 = (int64_t)Z_LVAL_P(args[i]); + } else { + SEPARATE_ZVAL_IF_NOT_REF(&args[i]); + convert_scalar_to_number( args[i] TSRMLS_CC ); + tInt64 = (Z_TYPE_P(args[i]) == IS_DOUBLE)?(int64_t)Z_DVAL_P(args[i]):Z_LVAL_P(args[i]); + } + fargs[i].setInt64(tInt64); + break; + } + case Formattable::kDate: + { + double dd = umsg_helper_zval_to_millis(args[i], status TSRMLS_CC); + CLEANUP_AND_RETURN_ON_ERROR(status); + fargs[i].setDate(dd); + break; + } + } + } + else { + /* We couldn't find any information about the argument in the pattern, this + * means it's an extra argument. So convert it to a number if it's a number or + * bool or null and to a string if it's anything else. */ + switch (Z_TYPE_P(args[i])) { + case IS_DOUBLE: + fargs[i].setDouble(Z_DVAL_P(args[i])); + break; + case IS_BOOL: + convert_to_long_ex(&args[i]); + /* Intentional fallthrough */ + case IS_LONG: + fargs[i].setInt64((int64_t) Z_LVAL_P(args[i])); + break; + case IS_NULL: + fargs[i].setInt64((int64_t) 0); + break; + default: + convert_to_string_ex(&args[i]); + intl_convert_utf8_to_utf16(&text, &textLen, Z_STRVAL_P(args[i]), Z_STRLEN_P(args[i]), status); + CLEANUP_AND_RETURN_ON_ERROR(status); + fargs[i].setString(text); + efree(text); + text = NULL; textLen = 0; + break; + } + } + } // visiting each argument argument UnicodeString resultStr; FieldPosition fieldPosition(0); - - /* format the message */ - ((const MessageFormat*)fmt)->format(fargs, fmt_count, resultStr, fieldPosition, *status); + /* format the message */ + if (usingNamedArguments) { + mf->format(farg_names, fargs, fmt_count, resultStr, *status); + delete[] farg_names; + } else { + mf->format(fargs, fmt_count, resultStr, fieldPosition, *status); + } delete[] fargs; if(U_FAILURE(*status)){ @@ -157,7 +377,7 @@ U_CFUNC void umsg_parse_helper(UMessageFormat *fmt, int *count, zval ***args, UC int stmp_len; ALLOC_INIT_ZVAL((*args)[i]); - + switch(fargs[i].getType()) { case Formattable::kDate: aDate = ((double)fargs[i].getDate())/U_MILLIS_PER_SECOND; |