diff options
Diffstat (limited to 'deps/v8/src/objects/js-collator.cc')
-rw-r--r-- | deps/v8/src/objects/js-collator.cc | 541 |
1 files changed, 541 insertions, 0 deletions
diff --git a/deps/v8/src/objects/js-collator.cc b/deps/v8/src/objects/js-collator.cc new file mode 100644 index 0000000000..c6cbecfb01 --- /dev/null +++ b/deps/v8/src/objects/js-collator.cc @@ -0,0 +1,541 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INTL_SUPPORT +#error Internationalization is expected to be enabled. +#endif // V8_INTL_SUPPORT + +#include "src/objects/js-collator.h" + +#include "src/isolate.h" +#include "src/objects-inl.h" +#include "src/objects/js-collator-inl.h" +#include "unicode/coll.h" +#include "unicode/locid.h" +#include "unicode/strenum.h" +#include "unicode/ucol.h" +#include "unicode/uloc.h" + +namespace v8 { +namespace internal { + +namespace { + +// TODO(gsathya): Consider internalizing the value strings. +void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options, + Handle<String> key, const char* value) { + CHECK_NOT_NULL(value); + Handle<String> value_str = + isolate->factory()->NewStringFromAsciiChecked(value); + + // This is a brand new JSObject that shouldn't already have the same + // key so this shouldn't fail. + CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_str, + kDontThrow) + .FromJust()); +} + +void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options, + Handle<String> key, bool value) { + Handle<Object> value_obj = isolate->factory()->ToBoolean(value); + + // This is a brand new JSObject that shouldn't already have the same + // key so this shouldn't fail. + CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_obj, + kDontThrow) + .FromJust()); +} + +} // anonymous namespace + +// static +Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate, + Handle<JSCollator> collator) { + Handle<JSObject> options = + isolate->factory()->NewJSObject(isolate->object_function()); + + JSCollator::Usage usage = collator->usage(); + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->usage_string(), + JSCollator::UsageToString(usage)); + + icu::Collator* icu_collator = collator->icu_collator()->raw(); + CHECK_NOT_NULL(icu_collator); + + UErrorCode status = U_ZERO_ERROR; + bool numeric = + icu_collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON; + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->numeric_string(), numeric); + + const char* case_first = nullptr; + status = U_ZERO_ERROR; + switch (icu_collator->getAttribute(UCOL_CASE_FIRST, status)) { + case UCOL_LOWER_FIRST: + case_first = "lower"; + break; + case UCOL_UPPER_FIRST: + case_first = "upper"; + break; + default: + case_first = "false"; + } + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->caseFirst_string(), case_first); + + const char* sensitivity = nullptr; + status = U_ZERO_ERROR; + switch (icu_collator->getAttribute(UCOL_STRENGTH, status)) { + case UCOL_PRIMARY: { + CHECK(U_SUCCESS(status)); + status = U_ZERO_ERROR; + // case level: true + s1 -> case, s1 -> base. + if (UCOL_ON == icu_collator->getAttribute(UCOL_CASE_LEVEL, status)) { + sensitivity = "case"; + } else { + sensitivity = "base"; + } + CHECK(U_SUCCESS(status)); + break; + } + case UCOL_SECONDARY: + sensitivity = "accent"; + break; + case UCOL_TERTIARY: + sensitivity = "variant"; + break; + case UCOL_QUATERNARY: + // We shouldn't get quaternary and identical from ICU, but if we do + // put them into variant. + sensitivity = "variant"; + break; + default: + sensitivity = "variant"; + } + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->sensitivity_string(), sensitivity); + + status = U_ZERO_ERROR; + bool ignore_punctuation = icu_collator->getAttribute(UCOL_ALTERNATE_HANDLING, + status) == UCOL_SHIFTED; + CHECK(U_SUCCESS(status)); + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->ignorePunctuation_string(), + ignore_punctuation); + + status = U_ZERO_ERROR; + const char* collation; + std::unique_ptr<icu::StringEnumeration> collation_values( + icu_collator->getKeywordValues("co", status)); + // Collation wasn't provided as a keyword to icu, use default. + if (status == U_ILLEGAL_ARGUMENT_ERROR) { + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->collation_string(), "default"); + } else { + CHECK(U_SUCCESS(status)); + CHECK_NOT_NULL(collation_values.get()); + + int32_t length; + status = U_ZERO_ERROR; + collation = collation_values->next(&length, status); + CHECK(U_SUCCESS(status)); + + // There has to be at least one value. + CHECK_NOT_NULL(collation); + CreateDataPropertyForOptions( + isolate, options, isolate->factory()->collation_string(), collation); + + status = U_ZERO_ERROR; + collation_values->reset(status); + CHECK(U_SUCCESS(status)); + } + + status = U_ZERO_ERROR; + icu::Locale icu_locale = icu_collator->getLocale(ULOC_VALID_LOCALE, status); + CHECK(U_SUCCESS(status)); + + char result[ULOC_FULLNAME_CAPACITY]; + status = U_ZERO_ERROR; + uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY, + FALSE, &status); + CHECK(U_SUCCESS(status)); + + CreateDataPropertyForOptions(isolate, options, + isolate->factory()->locale_string(), result); + + return options; +} + +namespace { + +std::map<std::string, std::string> LookupUnicodeExtensions( + const icu::Locale& icu_locale, const std::set<std::string>& relevant_keys) { + std::map<std::string, std::string> extensions; + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::StringEnumeration> keywords( + icu_locale.createKeywords(status)); + if (U_FAILURE(status)) return extensions; + + if (!keywords) return extensions; + char value[ULOC_FULLNAME_CAPACITY]; + + int32_t length; + status = U_ZERO_ERROR; + for (const char* keyword = keywords->next(&length, status); + keyword != nullptr; keyword = keywords->next(&length, status)) { + // Ignore failures in ICU and skip to the next keyword. + // + // This is fine.™ + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + + icu_locale.getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status); + + // Ignore failures in ICU and skip to the next keyword. + // + // This is fine.™ + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + + const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword); + + // Ignore keywords that we don't recognize - spec allows that. + if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) { + const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value); + extensions.insert( + std::pair<std::string, std::string>(bcp47_key, bcp47_value)); + } + } + + return extensions; +} + +void SetCaseFirstOption(icu::Collator* icu_collator, const char* value) { + CHECK_NOT_NULL(icu_collator); + CHECK_NOT_NULL(value); + UErrorCode status = U_ZERO_ERROR; + if (strcmp(value, "upper") == 0) { + icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); + } else if (strcmp(value, "lower") == 0) { + icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); + } else { + icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status); + } + CHECK(U_SUCCESS(status)); +} + +} // anonymous namespace + +// static +MaybeHandle<JSCollator> JSCollator::InitializeCollator( + Isolate* isolate, Handle<JSCollator> collator, Handle<Object> locales, + Handle<Object> options_obj) { + // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). + Handle<JSObject> requested_locales; + ASSIGN_RETURN_ON_EXCEPTION(isolate, requested_locales, + Intl::CanonicalizeLocaleListJS(isolate, locales), + JSCollator); + + // 2. If options is undefined, then + if (options_obj->IsUndefined(isolate)) { + // 2. a. Let options be ObjectCreate(null). + options_obj = isolate->factory()->NewJSObjectWithNullProto(); + } else { + // 3. Else + // 3. a. Let options be ? ToObject(options). + ASSIGN_RETURN_ON_EXCEPTION( + isolate, options_obj, + Object::ToObject(isolate, options_obj, "Intl.Collator"), JSCollator); + } + + // At this point, options_obj can either be a JSObject or a JSProxy only. + Handle<JSReceiver> options = Handle<JSReceiver>::cast(options_obj); + + // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", + // "search" », "sort"). + std::vector<const char*> values = {"sort", "search"}; + std::unique_ptr<char[]> usage_str = nullptr; + JSCollator::Usage usage = JSCollator::Usage::SORT; + Maybe<bool> found_usage = Intl::GetStringOption( + isolate, options, "usage", values, "Intl.Collator", &usage_str); + MAYBE_RETURN(found_usage, MaybeHandle<JSCollator>()); + + if (found_usage.FromJust()) { + DCHECK_NOT_NULL(usage_str.get()); + if (strcmp(usage_str.get(), "search") == 0) { + usage = JSCollator::Usage::SEARCH; + } + } + + // 5. Set collator.[[Usage]] to usage. + collator->set_usage(usage); + + // 6. If usage is "sort", then + // a. Let localeData be %Collator%.[[SortLocaleData]]. + // 7. Else, + // a. Let localeData be %Collator%.[[SearchLocaleData]]. + // + // The above two spec operations aren't required, the Intl spec is + // crazy. See https://github.com/tc39/ecma402/issues/256 + + // TODO(gsathya): This is currently done as part of the + // Intl::ResolveLocale call below. Fix this once resolveLocale is + // changed to not do the lookup. + // + // 9. Let matcher be ? GetOption(options, "localeMatcher", "string", + // « "lookup", "best fit" », "best fit"). + // 10. Set opt.[[localeMatcher]] to matcher. + + // 11. Let numeric be ? GetOption(options, "numeric", "boolean", + // undefined, undefined). + // 12. If numeric is not undefined, then + // a. Let numeric be ! ToString(numeric). + // + // Note: We omit the ToString(numeric) operation as it's not + // observable. Intl::GetBoolOption returns a Boolean and + // ToString(Boolean) is not side-effecting. + // + // 13. Set opt.[[kn]] to numeric. + bool numeric; + Maybe<bool> found_numeric = Intl::GetBoolOption(isolate, options, "numeric", + "Intl.Collator", &numeric); + MAYBE_RETURN(found_numeric, MaybeHandle<JSCollator>()); + + // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", + // « "upper", "lower", "false" », undefined). + // 15. Set opt.[[kf]] to caseFirst. + values = {"upper", "lower", "false"}; + std::unique_ptr<char[]> case_first_str = nullptr; + Maybe<bool> found_case_first = Intl::GetStringOption( + isolate, options, "caseFirst", values, "Intl.Collator", &case_first_str); + MAYBE_RETURN(found_case_first, MaybeHandle<JSCollator>()); + + // The relevant unicode extensions accepted by Collator as specified here: + // https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots + // + // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]]. + std::set<std::string> relevant_extension_keys{"co", "kn", "kf"}; + + // We don't pass the relevant_extension_keys to ResolveLocale here + // as per the spec. + // + // In ResolveLocale, the spec makes sure we only pick and use the + // relevant extension keys and ignore any other keys. Also, in + // ResolveLocale, the spec makes sure that if a given key has both a + // value in the options object and an unicode extension value, then + // we pick the value provided in the options object. + // For example: in the case of `new Intl.Collator('en-u-kn-true', { + // numeric: false })` the value `false` is used for the `numeric` + // key. + // + // Instead of performing all this validation in ResolveLocale, we + // just perform it inline below. In the future when we port + // ResolveLocale to C++, we can make all these validations generic + // and move it ResolveLocale. + // + // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], + // requestedLocales, opt, %Collator%.[[RelevantExtensionKeys]], + // localeData). + // 18. Set collator.[[Locale]] to r.[[locale]]. + Handle<JSObject> r; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, r, + Intl::ResolveLocale(isolate, "collator", requested_locales, options), + JSCollator); + + Handle<String> locale_with_extension_str = + isolate->factory()->NewStringFromStaticChars("localeWithExtension"); + Handle<Object> locale_with_extension_obj = + JSObject::GetDataProperty(r, locale_with_extension_str); + + // The locale_with_extension has to be a string. Either a user + // provided canonicalized string or the default locale. + CHECK(locale_with_extension_obj->IsString()); + Handle<String> locale_with_extension = + Handle<String>::cast(locale_with_extension_obj); + + icu::Locale icu_locale = + Intl::CreateICULocale(isolate, locale_with_extension); + DCHECK(!icu_locale.isBogus()); + + std::map<std::string, std::string> extensions = + LookupUnicodeExtensions(icu_locale, relevant_extension_keys); + + // 19. Let collation be r.[[co]]. + // + // r.[[co]] is already set as part of the icu::Locale creation as + // icu parses unicode extensions and sets the keywords. + // + // We need to sanitize the keywords based on certain ECMAScript rules. + // + // As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots: + // The values "standard" and "search" must not be used as elements + // in any [[SortLocaleData]][locale].co and + // [[SearchLocaleData]][locale].co list. + auto co_extension_it = extensions.find("co"); + if (co_extension_it != extensions.end()) { + const std::string& value = co_extension_it->second; + if ((value == "search") || (value == "standard")) { + UErrorCode status = U_ZERO_ERROR; + icu_locale.setKeywordValue("co", NULL, status); + CHECK(U_SUCCESS(status)); + } + } + + // 20. If collation is null, let collation be "default". + // 21. Set collator.[[Collation]] to collation. + // + // We don't store the collation value as per the above two steps + // here. The collation value can be looked up from icu::Collator on + // demand, as part of Intl.Collator.prototype.resolvedOptions. + + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr<icu::Collator> icu_collator( + icu::Collator::createInstance(icu_locale, status)); + if (U_FAILURE(status) || icu_collator.get() == nullptr) { + status = U_ZERO_ERROR; + // Remove extensions and try again. + icu::Locale no_extension_locale(icu_locale.getBaseName()); + icu_collator.reset( + icu::Collator::createInstance(no_extension_locale, status)); + + if (U_FAILURE(status) || icu_collator.get() == nullptr) { + FATAL("Failed to create ICU collator, are ICU data files missing?"); + } + } + DCHECK(U_SUCCESS(status)); + CHECK_NOT_NULL(icu_collator.get()); + + // 22. If relevantExtensionKeys contains "kn", then + // a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true"). + // + // If the numeric value is passed in through the options object, + // then we use it. Otherwise, we check if the numeric value is + // passed in through the unicode extensions. + status = U_ZERO_ERROR; + if (found_numeric.FromJust()) { + icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, + numeric ? UCOL_ON : UCOL_OFF, status); + CHECK(U_SUCCESS(status)); + } else { + auto kn_extension_it = extensions.find("kn"); + if (kn_extension_it != extensions.end()) { + const std::string& value = kn_extension_it->second; + + numeric = (value == "true"); + + icu_collator->setAttribute(UCOL_NUMERIC_COLLATION, + numeric ? UCOL_ON : UCOL_OFF, status); + CHECK(U_SUCCESS(status)); + } + } + + // 23. If relevantExtensionKeys contains "kf", then + // a. Set collator.[[CaseFirst]] to r.[[kf]]. + // + // If the caseFirst value is passed in through the options object, + // then we use it. Otherwise, we check if the caseFirst value is + // passed in through the unicode extensions. + if (found_case_first.FromJust()) { + const char* case_first_cstr = case_first_str.get(); + SetCaseFirstOption(icu_collator.get(), case_first_cstr); + } else { + auto kf_extension_it = extensions.find("kf"); + if (kf_extension_it != extensions.end()) { + const std::string& value = kf_extension_it->second; + SetCaseFirstOption(icu_collator.get(), value.c_str()); + } + } + + // Normalization is always on, by the spec. We are free to optimize + // if the strings are already normalized (but we don't have a way to tell + // that right now). + status = U_ZERO_ERROR; + icu_collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); + CHECK(U_SUCCESS(status)); + + // 24. Let sensitivity be ? GetOption(options, "sensitivity", + // "string", « "base", "accent", "case", "variant" », undefined). + values = {"base", "accent", "case", "variant"}; + std::unique_ptr<char[]> sensitivity_str = nullptr; + Maybe<bool> found_sensitivity = + Intl::GetStringOption(isolate, options, "sensitivity", values, + "Intl.Collator", &sensitivity_str); + MAYBE_RETURN(found_sensitivity, MaybeHandle<JSCollator>()); + + // 25. If sensitivity is undefined, then + if (!found_sensitivity.FromJust()) { + // 25. a. If usage is "sort", then + if (usage == Usage::SORT) { + // 25. a. i. Let sensitivity be "variant". + // 26. Set collator.[[Sensitivity]] to sensitivity. + icu_collator->setStrength(icu::Collator::TERTIARY); + } + } else { + DCHECK(found_sensitivity.FromJust()); + const char* sensitivity_cstr = sensitivity_str.get(); + DCHECK_NOT_NULL(sensitivity_cstr); + + // 26. Set collator.[[Sensitivity]] to sensitivity. + if (strcmp(sensitivity_cstr, "base") == 0) { + icu_collator->setStrength(icu::Collator::PRIMARY); + } else if (strcmp(sensitivity_cstr, "accent") == 0) { + icu_collator->setStrength(icu::Collator::SECONDARY); + } else if (strcmp(sensitivity_cstr, "case") == 0) { + icu_collator->setStrength(icu::Collator::PRIMARY); + status = U_ZERO_ERROR; + icu_collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status); + CHECK(U_SUCCESS(status)); + } else { + DCHECK_EQ(0, strcmp(sensitivity_cstr, "variant")); + icu_collator->setStrength(icu::Collator::TERTIARY); + } + } + + // 27.Let ignorePunctuation be ? GetOption(options, + // "ignorePunctuation", "boolean", undefined, false). + bool ignore_punctuation; + Maybe<bool> found_ignore_punctuation = + Intl::GetBoolOption(isolate, options, "ignorePunctuation", + "Intl.Collator", &ignore_punctuation); + MAYBE_RETURN(found_ignore_punctuation, MaybeHandle<JSCollator>()); + + // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation. + if (found_ignore_punctuation.FromJust() && ignore_punctuation) { + status = U_ZERO_ERROR; + icu_collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); + CHECK(U_SUCCESS(status)); + } + + Handle<Managed<icu::Collator>> managed_collator = + Managed<icu::Collator>::FromUniquePtr(isolate, 0, + std::move(icu_collator)); + collator->set_icu_collator(*managed_collator); + + // 29. Return collator. + return collator; +} + +// static +const char* JSCollator::UsageToString(Usage usage) { + switch (usage) { + case Usage::SORT: + return "sort"; + case Usage::SEARCH: + return "search"; + case Usage::COUNT: + UNREACHABLE(); + } +} + +} // namespace internal +} // namespace v8 |