diff options
author | Steven R. Loomis <srloomis@us.ibm.com> | 2018-03-26 15:29:02 -0700 |
---|---|---|
committer | Steven R. Loomis <srloomis@us.ibm.com> | 2018-04-02 18:18:28 -0700 |
commit | 64211405dab824a570e52d000891c49415cc42b8 (patch) | |
tree | 4c196d0e2c19e083db1e124139dd4ba6272fd049 /deps/icu-small | |
parent | 88773af540a36b23a47af0d6c4ce03b8cc3ef9aa (diff) | |
download | node-new-64211405dab824a570e52d000891c49415cc42b8.tar.gz |
deps: ICU 61.1 bump
- Update to released ICU 61.1, including:
- CLDR 33 (many new languages and data improvements)
- Many small API additions, improvements, and bug fixes
- note: 'icu::' namespace is no longer used by default
(Necessated https://github.com/nodejs/node/pull/18667 )
PR-URL: https://github.com/nodejs/node/pull/19621
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Diffstat (limited to 'deps/icu-small')
178 files changed, 8926 insertions, 2007 deletions
diff --git a/deps/icu-small/LICENSE b/deps/icu-small/LICENSE index c84076cd07..25b6eb9d34 100644 --- a/deps/icu-small/LICENSE +++ b/deps/icu-small/LICENSE @@ -1,7 +1,7 @@ COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) -Copyright © 1991-2017 Unicode, Inc. All rights reserved. -Distributed under the Terms of Use in http://www.unicode.org/copyright.html +Copyright © 1991-2018 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in http://www.unicode.org/copyright.html. Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode data files and any associated documentation @@ -383,3 +383,32 @@ Database section 7. # by ICANN or the IETF Trust on the database or the code. Any person # making a contribution to the database or code waives all rights to # future claims in that contribution or in the TZ Database. + +6. Google double-conversion + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deps/icu-small/README-SMALL-ICU.txt b/deps/icu-small/README-SMALL-ICU.txt index c6dc0b3051..b3919ec52b 100644 --- a/deps/icu-small/README-SMALL-ICU.txt +++ b/deps/icu-small/README-SMALL-ICU.txt @@ -1,8 +1,8 @@ Small ICU sources - auto generated by shrink-icu-src.py This directory contains the ICU subset used by --with-intl=small-icu (the default) -It is a strict subset of ICU 60 source files with the following exception(s): -* deps/icu-small/source/data/in/icudt60l.dat : Reduced-size data file +It is a strict subset of ICU 61 source files with the following exception(s): +* deps/icu-small/source/data/in/icudt61l.dat : Reduced-size data file To rebuild this directory, see ../../tools/icu/README.md diff --git a/deps/icu-small/source/common/bmpset.cpp b/deps/icu-small/source/common/bmpset.cpp index f84bfd7f5b..35bc80dce3 100644 --- a/deps/icu-small/source/common/bmpset.cpp +++ b/deps/icu-small/source/common/bmpset.cpp @@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) { ++lead; } if(lead<limitLead) { - bits=~((1<<lead)-1); + bits=~(((unsigned)1<<lead)-1); if(limitLead<0x20) { - bits&=(1<<limitLead)-1; + bits&=((unsigned)1<<limitLead)-1; } for(trail=0; trail<64; ++trail) { table[trail]|=bits; diff --git a/deps/icu-small/source/common/brkeng.cpp b/deps/icu-small/source/common/brkeng.cpp index da64b3bdef..a513bafb16 100644 --- a/deps/icu-small/source/common/brkeng.cpp +++ b/deps/icu-small/source/common/brkeng.cpp @@ -59,58 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() { ****************************************************************** */ -UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { - for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) { - fHandled[i] = 0; - } +UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) { + (void)status; } UnhandledEngine::~UnhandledEngine() { - for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) { - if (fHandled[i] != 0) { - delete fHandled[i]; - } - } + delete fHandled; + fHandled = nullptr; } UBool -UnhandledEngine::handles(UChar32 c, int32_t breakType) const { - return (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled) - && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); +UnhandledEngine::handles(UChar32 c) const { + return fHandled && fHandled->contains(c); } int32_t UnhandledEngine::findBreaks( UText *text, int32_t /* startPos */, int32_t endPos, - int32_t breakType, UVector32 &/*foundBreaks*/ ) const { - if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { - UChar32 c = utext_current32(text); - while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { - utext_next32(text); // TODO: recast loop to work with post-increment operations. - c = utext_current32(text); - } + UChar32 c = utext_current32(text); + while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { + utext_next32(text); // TODO: recast loop to work with post-increment operations. + c = utext_current32(text); } return 0; } void -UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { - if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { - if (fHandled[breakType] == 0) { - fHandled[breakType] = new UnicodeSet(); - if (fHandled[breakType] == 0) { - return; - } - } - if (!fHandled[breakType]->contains(c)) { - UErrorCode status = U_ZERO_ERROR; - // Apply the entire script of the character. - int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); - fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); +UnhandledEngine::handleCharacter(UChar32 c) { + if (fHandled == nullptr) { + fHandled = new UnicodeSet(); + if (fHandled == nullptr) { + return; } } + if (!fHandled->contains(c)) { + UErrorCode status = U_ZERO_ERROR; + // Apply the entire script of the character. + int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); + fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status); + } } /* @@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER; const LanguageBreakEngine * -ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { +ICULanguageBreakFactory::getEngineFor(UChar32 c) { const LanguageBreakEngine *lbe = NULL; UErrorCode status = U_ZERO_ERROR; @@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { int32_t i = fEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); - if (lbe != NULL && lbe->handles(c, breakType)) { + if (lbe != NULL && lbe->handles(c)) { return lbe; } } } // We didn't find an engine. Create one. - lbe = loadEngineFor(c, breakType); + lbe = loadEngineFor(c); if (lbe != NULL) { fEngines->push((void *)lbe, status); } @@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { } const LanguageBreakEngine * -ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { +ICULanguageBreakFactory::loadEngineFor(UChar32 c) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { - DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType); + DictionaryMatcher *m = loadDictionaryMatcherFor(code); if (m != NULL) { const LanguageBreakEngine *engine = NULL; switch(code) { @@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { } DictionaryMatcher * -ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) { +ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { UErrorCode status = U_ZERO_ERROR; // open root from brkitr tree. UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); diff --git a/deps/icu-small/source/common/brkeng.h b/deps/icu-small/source/common/brkeng.h index 5c61d2ed5d..e40fce13f6 100644 --- a/deps/icu-small/source/common/brkeng.h +++ b/deps/icu-small/source/common/brkeng.h @@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory { * a particular kind of break.</p> * * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c, int32_t breakType) const = 0; + virtual UBool handles(UChar32 c) const = 0; /** * <p>Find any breaks within a run in the supplied text.</p> @@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory { * is capable of handling. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks A Vector of int32_t to receive the breaks. * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const = 0; }; @@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0; + virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0; }; @@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine { private: /** - * The sets of characters handled, for each break type + * The sets of characters handled. * @internal */ - UnicodeSet *fHandled[4]; + UnicodeSet *fHandled; public: @@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine { * a particular kind of break.</p> * * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c, int32_t breakType) const; + virtual UBool handles(UChar32 c) const; /** * <p>Find any breaks within a run in the supplied text.</p> @@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine { * is capable of handling. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks An allocated C array of the breaks found, if any * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const; /** * <p>Tell the engine to handle a particular character and break type.</p> * * @param c A character which the engine should handle - * @param breakType The type of text break for which the engine should handle c */ - virtual void handleCharacter(UChar32 c, int32_t breakType); + virtual void handleCharacter(UChar32 c); }; @@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType); + virtual const LanguageBreakEngine *getEngineFor(UChar32 c); protected: /** @@ -263,21 +252,17 @@ protected: * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType); + virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); /** * <p>Create a DictionaryMatcher for the specified script and break type.</p> * @param script An ISO 15924 script code that identifies the dictionary to be * created. - * @param breakType The kind of text break for which a dictionary is - * sought. * @return A DictionaryMatcher with the desired characteristics, or NULL. */ - virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType); + virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/brkiter.cpp b/deps/icu-small/source/common/brkiter.cpp index a509ff10c9..23e0cc3c15 100644 --- a/deps/icu-small/source/common/brkiter.cpp +++ b/deps/icu-small/source/common/brkiter.cpp @@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN // ------------------------------------- BreakIterator* -BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) +BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status) { char fnbuff[256]; char ext[4]={'\0'}; @@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, U_LOCALE_BASED(locBased, *(BreakIterator*)result); locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale.data()); - result->setBreakType(kind); } ures_close(b); @@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) BreakIterator *result = NULL; switch (kind) { case UBRK_CHARACTER: - result = BreakIterator::buildInstance(loc, "grapheme", kind, status); + result = BreakIterator::buildInstance(loc, "grapheme", status); break; case UBRK_WORD: - result = BreakIterator::buildInstance(loc, "word", kind, status); + result = BreakIterator::buildInstance(loc, "word", status); break; case UBRK_LINE: uprv_strcpy(lbType, "line"); @@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) uprv_strcat(lbType, lbKeyValue); } } - result = BreakIterator::buildInstance(loc, lbType, kind, status); + result = BreakIterator::buildInstance(loc, lbType, status); break; case UBRK_SENTENCE: - result = BreakIterator::buildInstance(loc, "sentence", kind, status); + result = BreakIterator::buildInstance(loc, "sentence", status); #if !UCONFIG_NO_FILTERED_BREAK_ITERATION { char ssKeyValue[kKeyValueLenMax] = {0}; @@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) #endif break; case UBRK_TITLE: - result = BreakIterator::buildInstance(loc, "title", kind, status); + result = BreakIterator::buildInstance(loc, "title", status); break; default: status = U_ILLEGAL_ARGUMENT_ERROR; diff --git a/deps/icu-small/source/common/bytesinkutil.cpp b/deps/icu-small/source/common/bytesinkutil.cpp index bf1a2d45f8..6af7ddfd59 100644 --- a/deps/icu-small/source/common/bytesinkutil.cpp +++ b/deps/icu-small/source/common/bytesinkutil.cpp @@ -92,20 +92,16 @@ ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { sink.Append(s8, 2); } -UBool -ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length, - ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - if (length > 0) { - if (edits != nullptr) { - edits->addUnchanged(length); - } - if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { - sink.Append(reinterpret_cast<const char *>(s), length); - } +void +ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits) { + U_ASSERT(length > 0); + if (edits != nullptr) { + edits->addUnchanged(length); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(reinterpret_cast<const char *>(s), length); } - return TRUE; } UBool @@ -117,7 +113,11 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return FALSE; } - return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode); + int32_t length = (int32_t)(limit - s); + if (length > 0) { + appendNonEmptyUnchanged(s, length, sink, options, edits); + } + return TRUE; } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/bytesinkutil.h b/deps/icu-small/source/common/bytesinkutil.h index 004b49c4ce..8287ffea4c 100644 --- a/deps/icu-small/source/common/bytesinkutil.h +++ b/deps/icu-small/source/common/bytesinkutil.h @@ -43,11 +43,19 @@ public: static UBool appendUnchanged(const uint8_t *s, int32_t length, ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode); + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); } + return TRUE; + } static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit, ByteSink &sink, uint32_t options, Edits *edits, UErrorCode &errorCode); + +private: + static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits); }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/cmemory.cpp b/deps/icu-small/source/common/cmemory.cpp index 300279c243..0b7e432c4d 100644 --- a/deps/icu-small/source/common/cmemory.cpp +++ b/deps/icu-small/source/common/cmemory.cpp @@ -41,30 +41,6 @@ static int n=0; static long b=0; #endif -#if U_DEBUG - -static char gValidMemorySink = 0; - -U_CAPI void uprv_checkValidMemory(const void *p, size_t n) { - /* - * Access the memory to ensure that it's all valid. - * Load and save a computed value to try to ensure that the compiler - * does not throw away the whole loop. - * A thread analyzer might complain about un-mutexed access to gValidMemorySink - * which is true but harmless because no one ever uses the value in gValidMemorySink. - */ - const char *s = (const char *)p; - char c = gValidMemorySink; - size_t i; - U_ASSERT(p != NULL); - for(i = 0; i < n; ++i) { - c ^= s[i]; - } - gValidMemorySink = c; -} - -#endif /* U_DEBUG */ - U_CAPI void * U_EXPORT2 uprv_malloc(size_t s) { #if U_DEBUG && defined(UPRV_MALLOC_COUNT) diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index 83a0129651..a44f9a1902 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -36,31 +36,10 @@ #include <stdio.h> #endif -#if U_DEBUG - -/* - * The C++ standard requires that the source pointer for memcpy() & memmove() - * is valid, not NULL, and not at the end of an allocated memory block. - * In debug mode, we read one byte from the source point to verify that it's - * a valid, readable pointer. - */ - -U_CAPI void uprv_checkValidMemory(const void *p, size_t n); - -#define uprv_memcpy(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)) -#define uprv_memmove(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)) - -#else #define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size) #define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size) -#endif /* U_DEBUG */ - /** * \def UPRV_LENGTHOF * Convenience macro to determine the length of a fixed array at compile-time. diff --git a/deps/icu-small/source/common/cstring.h b/deps/icu-small/source/common/cstring.h index 2232efcda5..ed0b1a7c8b 100644 --- a/deps/icu-small/source/common/cstring.h +++ b/deps/icu-small/source/common/cstring.h @@ -40,28 +40,10 @@ #define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c) #define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c) #define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c) - -#if U_DEBUG - -#define uprv_strncpy(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)) -#define uprv_strncmp(s1, s2, n) ( \ - uprv_checkValidMemory(s1, 1), \ - uprv_checkValidMemory(s2, 1), \ - U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)) -#define uprv_strncat(dst, src, n) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)) - -#else - #define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size) #define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n) #define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n) -#endif /* U_DEBUG */ - /** * Is c an ASCII-repertoire letter a-z or A-Z? * Note: The implementation is specific to whether ICU is compiled for diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp index 02fc8a4726..419d062ef2 100644 --- a/deps/icu-small/source/common/dictbe.cpp +++ b/deps/icu-small/source/common/dictbe.cpp @@ -29,24 +29,21 @@ U_NAMESPACE_BEGIN ****************************************************************** */ -DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) { - fTypes = breakTypes; +DictionaryBreakEngine::DictionaryBreakEngine() { } DictionaryBreakEngine::~DictionaryBreakEngine() { } UBool -DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const { - return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes) - && fSet.contains(c)); +DictionaryBreakEngine::handles(UChar32 c) const { + return fSet.contains(c); } int32_t DictionaryBreakEngine::findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const { (void)startPos; // TODO: remove this param? int32_t result = 0; @@ -66,10 +63,8 @@ DictionaryBreakEngine::findBreaks( UText *text, } rangeStart = start; rangeEnd = current; - if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) { - result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); - utext_setNativeIndex(text, current); - } + result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); + utext_setNativeIndex(text, current); return result; } @@ -194,7 +189,7 @@ static const int32_t THAI_MIN_WORD = 2; static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2; ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status); @@ -436,7 +431,7 @@ static const int32_t LAO_MIN_WORD = 2; static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2; LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status); @@ -632,7 +627,7 @@ static const int32_t BURMESE_MIN_WORD = 2; static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2; BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status); @@ -825,7 +820,7 @@ static const int32_t KHMER_MIN_WORD = 2; static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status); @@ -1047,7 +1042,7 @@ foundBest: */ static const uint32_t kuint32max = 0xFFFFFFFF; CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status) -: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) { +: DictionaryBreakEngine(), fDictionary(adoptDictionary) { // Korean dictionary only includes Hangul syllables fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status); fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status); @@ -1324,8 +1319,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, } if (katakanaRunLength < kMaxKatakanaGroupLength) { uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength); - if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) { - bestSnlp.setElementAt(newSnlp, j); + if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) { + bestSnlp.setElementAt(newSnlp, i+katakanaRunLength); prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i; } } diff --git a/deps/icu-small/source/common/dictbe.h b/deps/icu-small/source/common/dictbe.h index ffc1ae9f26..99d176cc2e 100644 --- a/deps/icu-small/source/common/dictbe.h +++ b/deps/icu-small/source/common/dictbe.h @@ -42,27 +42,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine { UnicodeSet fSet; - /** - * The set of break types handled by this engine - * @internal - */ - - uint32_t fTypes; - - /** - * <p>Default constructor.</p> - * - */ - DictionaryBreakEngine(); - public: /** - * <p>Constructor setting the break types handled.</p> - * - * @param breakTypes A bitmap of types handled by the engine. + * <p>Constructor </p> */ - DictionaryBreakEngine( uint32_t breakTypes ); + DictionaryBreakEngine(); /** * <p>Virtual destructor.</p> @@ -74,11 +59,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * a particular kind of break.</p> * * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles( UChar32 c, int32_t breakType ) const; + virtual UBool handles(UChar32 c) const; /** * <p>Find any breaks within a run in the supplied text.</p> @@ -88,14 +72,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * that starts from the first character in the range. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks vector of int32_t to receive the break positions * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const; protected: @@ -108,13 +90,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine { virtual void setCharacters( const UnicodeSet &set ); /** - * <p>Set the break types handled by this engine.</p> - * - * @param breakTypes A bitmap of types handled by the engine. - */ -// virtual void setBreakTypes( uint32_t breakTypes ); - - /** * <p>Divide up a range of known dictionary characters handled by this break engine.</p> * * @param text A UText representing the text diff --git a/deps/icu-small/source/common/filteredbrk.cpp b/deps/icu-small/source/common/filteredbrk.cpp index 6a38b1bf3b..162b38de5d 100644 --- a/deps/icu-small/source/common/filteredbrk.cpp +++ b/deps/icu-small/source/common/filteredbrk.cpp @@ -694,6 +694,11 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st } FilteredBreakIteratorBuilder * +FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) { + return createEmptyInstance(status); +} + +FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) { if(U_FAILURE(status)) return NULL; LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status); diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index 54b289e24d..69f92d94c6 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -64,7 +64,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator) * Constructs a RuleBasedBreakIterator that uses the already-created * tables object that is passed in as a parameter. */ -RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) { +RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) + : fSCharIter(UnicodeString()) +{ init(status); fData = new RBBIDataWrapper(data, status); // status checked in constructor if (U_FAILURE(status)) {return;} @@ -80,7 +82,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode // RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, - UErrorCode &status) { + UErrorCode &status) + : fSCharIter(UnicodeString()) +{ init(status); if (U_FAILURE(status)) { return; @@ -110,6 +114,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, // //------------------------------------------------------------------------------- RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status) + : fSCharIter(UnicodeString()) { init(status); fData = new RBBIDataWrapper(udm, status); // status checked in constructor @@ -130,6 +135,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &sta RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, UParseError &parseError, UErrorCode &status) + : fSCharIter(UnicodeString()) { init(status); if (U_FAILURE(status)) {return;} @@ -152,7 +158,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, // Used when creating a RuleBasedBreakIterator from a set // of rules. //------------------------------------------------------------------------------- -RuleBasedBreakIterator::RuleBasedBreakIterator() { +RuleBasedBreakIterator::RuleBasedBreakIterator() + : fSCharIter(UnicodeString()) +{ UErrorCode status = U_ZERO_ERROR; init(status); } @@ -165,7 +173,8 @@ RuleBasedBreakIterator::RuleBasedBreakIterator() { // //------------------------------------------------------------------------------- RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other) -: BreakIterator(other) +: BreakIterator(other), + fSCharIter(UnicodeString()) { UErrorCode status = U_ZERO_ERROR; this->init(status); @@ -177,17 +186,13 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth * Destructor */ RuleBasedBreakIterator::~RuleBasedBreakIterator() { - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // fCharIter was adopted from the outside. delete fCharIter; } fCharIter = NULL; - delete fSCharIter; - fSCharIter = NULL; - delete fDCharIter; - fDCharIter = NULL; - utext_close(fText); + utext_close(&fText); if (fData != NULL) { fData->removeReference(); @@ -217,26 +222,29 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { } BreakIterator::operator=(that); - fBreakType = that.fBreakType; if (fLanguageBreakEngines != NULL) { delete fLanguageBreakEngines; fLanguageBreakEngines = NULL; // Just rebuild for now } // TODO: clone fLanguageBreakEngines from "that" UErrorCode status = U_ZERO_ERROR; - fText = utext_clone(fText, that.fText, FALSE, TRUE, &status); + utext_clone(&fText, &that.fText, FALSE, TRUE, &status); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { delete fCharIter; } - fCharIter = NULL; + fCharIter = &fSCharIter; - if (that.fCharIter != NULL ) { + if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) { // This is a little bit tricky - it will intially appear that // this->fCharIter is adopted, even if that->fCharIter was // not adopted. That's ok. fCharIter = that.fCharIter->clone(); } + fSCharIter = that.fSCharIter; + if (fCharIter == NULL) { + fCharIter = &fSCharIter; + } if (fData != NULL) { fData->removeReference(); @@ -269,33 +277,30 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { // //----------------------------------------------------------------------------- void RuleBasedBreakIterator::init(UErrorCode &status) { - fText = NULL; fCharIter = NULL; - fSCharIter = NULL; - fDCharIter = NULL; fData = NULL; fPosition = 0; fRuleStatusIndex = 0; fDone = false; fDictionaryCharCount = 0; - fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable - // dictionary behavior for Break Iterators that are - // built from rules. Even better would be the ability to - // declare the type in the rules. - fLanguageBreakEngines = NULL; fUnhandledBreakEngine = NULL; fBreakCache = NULL; fDictionaryCache = NULL; - if (U_FAILURE(status)) { + // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER. + // fText = UTEXT_INITIALIZER; + static const UText initializedUText = UTEXT_INITIALIZER; + uprv_memcpy(&fText, &initializedUText, sizeof(UText)); + + if (U_FAILURE(status)) { return; } - fText = utext_openUChars(NULL, NULL, 0, &status); + utext_openUChars(&fText, NULL, 0, &status); fDictionaryCache = new DictionaryCache(this, status); fBreakCache = new BreakCache(this, status); - if (U_SUCCESS(status) && (fText == NULL || fDictionaryCache == NULL || fBreakCache == NULL)) { + if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) { status = U_MEMORY_ALLOCATION_ERROR; } @@ -344,7 +349,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const { const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that; - if (!utext_equals(fText, that2.fText)) { + if (!utext_equals(&fText, &that2.fText)) { // The two break iterators are operating on different text, // or have a different iteration position. // Note that fText's position is always the same as the break iterator's position. @@ -385,7 +390,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { } fBreakCache->reset(); fDictionaryCache->reset(); - fText = utext_clone(fText, ut, FALSE, TRUE, &status); + utext_clone(&fText, ut, FALSE, TRUE, &status); // Set up a dummy CharacterIterator to be returned if anyone // calls getText(). With input from UText, there is no reasonable @@ -393,27 +398,20 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { // Return one over an empty string instead - this is the closest // we can come to signaling a failure. // (GetText() is obsolete, this failure is sort of OK) - if (fDCharIter == NULL) { - static const UChar c = 0; - fDCharIter = new UCharCharacterIterator(&c, 0); - if (fDCharIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - } + fSCharIter.setText(UnicodeString()); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // existing fCharIter was adopted from the outside. Delete it now. delete fCharIter; } - fCharIter = fDCharIter; + fCharIter = &fSCharIter; this->first(); } UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const { - UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status); + UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status); return result; } @@ -439,7 +437,7 @@ void RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { // If we are holding a CharacterIterator adopted from a // previous call to this function, delete it now. - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { delete fCharIter; } @@ -450,9 +448,9 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { if (newText==NULL || newText->startIndex() != 0) { // startIndex !=0 wants to be an error, but there's no way to report it. // Make the iterator text be an empty string. - fText = utext_openUChars(fText, NULL, 0, &status); + utext_openUChars(&fText, NULL, 0, &status); } else { - fText = utext_openCharacterIterator(fText, newText, &status); + utext_openCharacterIterator(&fText, newText, &status); } this->first(); } @@ -467,23 +465,19 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) { UErrorCode status = U_ZERO_ERROR; fBreakCache->reset(); fDictionaryCache->reset(); - fText = utext_openConstUnicodeString(fText, &newText, &status); + utext_openConstUnicodeString(&fText, &newText, &status); // Set up a character iterator on the string. // Needed in case someone calls getText(). // Can not, unfortunately, do this lazily on the (probably never) // call to getText(), because getText is const. - if (fSCharIter == NULL) { - fSCharIter = new StringCharacterIterator(newText); - } else { - fSCharIter->setText(newText); - } + fSCharIter.setText(newText); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // old fCharIter was adopted from the outside. Delete it. delete fCharIter; } - fCharIter = fSCharIter; + fCharIter = &fSCharIter; this->first(); } @@ -503,14 +497,14 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U status = U_ILLEGAL_ARGUMENT_ERROR; return *this; } - int64_t pos = utext_getNativeIndex(fText); + int64_t pos = utext_getNativeIndex(&fText); // Shallow read-only clone of the new UText into the existing input UText - fText = utext_clone(fText, input, FALSE, TRUE, &status); + utext_clone(&fText, input, FALSE, TRUE, &status); if (U_FAILURE(status)) { return *this; } - utext_setNativeIndex(fText, pos); - if (utext_getNativeIndex(fText) != pos) { + utext_setNativeIndex(&fText, pos); + if (utext_getNativeIndex(&fText) != pos) { // Sanity check. The new input utext is supposed to have the exact same // contents as the old. If we can't set to the same position, it doesn't. // The contents underlying the old utext might be invalid at this point, @@ -540,7 +534,7 @@ int32_t RuleBasedBreakIterator::first(void) { * @return The text's past-the-end offset. */ int32_t RuleBasedBreakIterator::last(void) { - int32_t endPos = (int32_t)utext_nativeLength(fText); + int32_t endPos = (int32_t)utext_nativeLength(&fText); UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position. (void)endShouldBeBoundary; U_ASSERT(endShouldBeBoundary); @@ -611,8 +605,8 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) { // Move requested offset to a code point start. It might be on a trail surrogate, // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text. - utext_setNativeIndex(fText, startPos); - startPos = (int32_t)utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, startPos); + startPos = (int32_t)utext_getNativeIndex(&fText); UErrorCode status = U_ZERO_ERROR; fBreakCache->following(startPos, status); @@ -626,15 +620,15 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) { * @return The position of the last boundary before the starting position. */ int32_t RuleBasedBreakIterator::preceding(int32_t offset) { - if (fText == NULL || offset > utext_nativeLength(fText)) { + if (offset > utext_nativeLength(&fText)) { return last(); } // Move requested offset to a code point start. It might be on a trail surrogate, // or on a trail byte if the input is UTF-8. - utext_setNativeIndex(fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = utext_getNativeIndex(&fText); UErrorCode status = U_ZERO_ERROR; fBreakCache->preceding(adjustedOffset, status); @@ -660,8 +654,8 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { // Note that isBoundary() is always be false for offsets that are not on code point boundaries. // But we still need the side effect of leaving iteration at the following boundary. - utext_setNativeIndex(fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = utext_getNativeIndex(&fText); bool result = false; UErrorCode status = U_ZERO_ERROR; @@ -669,7 +663,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { result = (fBreakCache->current() == offset); } - if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) { + if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) { // Original offset is beyond the end of the text. Return FALSE, it's not a boundary, // but the iteration position remains set to the end of the text, which is a boundary. return FALSE; @@ -789,9 +783,9 @@ int32_t RuleBasedBreakIterator::handleNext() { // if we're already at the end of the text, return DONE. initialPosition = fPosition; - UTEXT_SETNATIVEINDEX(fText, initialPosition); + UTEXT_SETNATIVEINDEX(&fText, initialPosition); result = initialPosition; - c = UTEXT_NEXT32(fText); + c = UTEXT_NEXT32(&fText); if (c==U_SENTINEL) { fDone = TRUE; return UBRK_DONE; @@ -854,7 +848,7 @@ int32_t RuleBasedBreakIterator::handleNext() { #ifdef RBBI_DEBUG if (gTrace) { - RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText)); + RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { @@ -867,9 +861,7 @@ int32_t RuleBasedBreakIterator::handleNext() { // State Transition - move machine to its next state // - // Note: fNextState is defined as uint16_t[2], but we are casting - // a generated RBBI table to RBBIStateTableRow and some tables - // actually have more than 2 categories. + // fNextState is a variable-length array. U_ASSERT(category<fData->fHeader->fCatCount); state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) @@ -880,7 +872,7 @@ int32_t RuleBasedBreakIterator::handleNext() { if (row->fAccepting == -1) { // Match found, common case. if (mode != RBBI_START) { - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values. } @@ -898,7 +890,7 @@ int32_t RuleBasedBreakIterator::handleNext() { int16_t rule = row->fLookAhead; if (rule != 0) { // At the position of a '/' in a look-ahead match. Record it. - int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText); + int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); lookAheadMatches.setPosition(rule, pos); } @@ -914,7 +906,7 @@ int32_t RuleBasedBreakIterator::handleNext() { // the input position. The next iteration will be processing the // first real input character. if (mode == RBBI_RUN) { - c = UTEXT_NEXT32(fText); + c = UTEXT_NEXT32(&fText); } else { if (mode == RBBI_START) { mode = RBBI_RUN; @@ -928,9 +920,9 @@ int32_t RuleBasedBreakIterator::handleNext() { // (This really indicates a defect in the break rules. They should always match // at least one character.) if (result == initialPosition) { - utext_setNativeIndex(fText, initialPosition); - utext_next32(fText); - result = (int32_t)utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, initialPosition); + utext_next32(&fText); + result = (int32_t)utext_getNativeIndex(&fText); fRuleStatusIndex = 0; } @@ -965,7 +957,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { int32_t initialPosition = 0; const RBBIStateTable *stateTable = fData->fSafeRevTable; - UTEXT_SETNATIVEINDEX(fText, fromPosition); + UTEXT_SETNATIVEINDEX(&fText, fromPosition); #ifdef RBBI_DEBUG if (gTrace) { RBBIDebugPuts("Handle Previous pos char state category"); @@ -973,14 +965,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { #endif // if we're already at the start of the text, return DONE. - if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) { + if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) { return BreakIterator::DONE; } // Set up the starting char. - initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText); + initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText); result = initialPosition; - c = UTEXT_PREVIOUS32(fText); + c = UTEXT_PREVIOUS32(&fText); // Set the initial state for the state machine state = START_STATE; @@ -1028,7 +1020,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { #ifdef RBBI_DEBUG if (gTrace) { - RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText)); + RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { @@ -1041,9 +1033,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // State Transition - move machine to its next state // - // Note: fNextState is defined as uint16_t[2], but we are casting - // a generated RBBI table to RBBIStateTableRow and some tables - // actually have more than 2 categories. + // fNextState is a variable-length array. U_ASSERT(category<fData->fHeader->fCatCount); state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) @@ -1051,7 +1041,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { if (row->fAccepting == -1) { // Match found, common case. - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } int16_t completedRule = row->fAccepting; @@ -1059,14 +1049,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // Lookahead match is completed. int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule); if (lookaheadResult >= 0) { - UTEXT_SETNATIVEINDEX(fText, lookaheadResult); + UTEXT_SETNATIVEINDEX(&fText, lookaheadResult); return lookaheadResult; } } int16_t rule = row->fLookAhead; if (rule != 0) { // At the position of a '/' in a look-ahead match. Record it. - int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText); + int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); lookAheadMatches.setPosition(rule, pos); } @@ -1082,7 +1072,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // the input position. The next iteration will be processing the // first real input character. if (mode == RBBI_RUN) { - c = UTEXT_PREVIOUS32(fText); + c = UTEXT_PREVIOUS32(&fText); } else { if (mode == RBBI_START) { mode = RBBI_RUN; @@ -1096,9 +1086,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // (This really indicates a defect in the break rules. They should always match // at least one character.) if (result == initialPosition) { - UTEXT_SETNATIVEINDEX(fText, initialPosition); - UTEXT_PREVIOUS32(fText); - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + UTEXT_SETNATIVEINDEX(&fText, initialPosition); + UTEXT_PREVIOUS32(&fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } #ifdef RBBI_DEBUG @@ -1247,7 +1237,7 @@ static void U_CALLCONV initLanguageFactories() { static const LanguageBreakEngine* -getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType) +getLanguageBreakEngineFromFactory(UChar32 c) { umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories); if (gLanguageBreakFactories == NULL) { @@ -1258,7 +1248,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType) const LanguageBreakEngine *lbe = NULL; while (--i >= 0) { LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i)); - lbe = factory->getEngineFor(c, breakType); + lbe = factory->getEngineFor(c); if (lbe != NULL) { break; } @@ -1290,14 +1280,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { int32_t i = fLanguageBreakEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i)); - if (lbe->handles(c, fBreakType)) { + if (lbe->handles(c)) { return lbe; } } // No existing dictionary took the character. See if a factory wants to // give us a new LanguageBreakEngine for this character. - lbe = getLanguageBreakEngineFromFactory(c, fBreakType); + lbe = getLanguageBreakEngineFromFactory(c); // If we got one, use it and push it on our stack. if (lbe != NULL) { @@ -1313,6 +1303,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { fUnhandledBreakEngine = new UnhandledEngine(status); if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) { status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } // Put it last so that scripts for which we have an engine get tried // first. @@ -1327,25 +1318,19 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { // Tell the reject engine about the character; at its discretion, it may // add more than just the one character. - fUnhandledBreakEngine->handleCharacter(c, fBreakType); + fUnhandledBreakEngine->handleCharacter(c); return fUnhandledBreakEngine; } - - -/*int32_t RuleBasedBreakIterator::getBreakType() const { - return fBreakType; -}*/ - -void RuleBasedBreakIterator::setBreakType(int32_t type) { - fBreakType = type; -} - void RuleBasedBreakIterator::dumpCache() { fBreakCache->dumpCache(); } +void RuleBasedBreakIterator::dumpTables() { + fData->printData(); +} + /** * Returns the description used to create this iterator */ diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index 9d716bb342..ba9329d477 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -26,14 +26,11 @@ U_NAMESPACE_BEGIN */ RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) : - fBI(bi), fBreaks(NULL), fPositionInCache(-1), + fBI(bi), fBreaks(status), fPositionInCache(-1), fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) { - fBreaks = new UVector32(status); } RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() { - delete fBreaks; - fBreaks = NULL; } void RuleBasedBreakIterator::DictionaryCache::reset() { @@ -42,7 +39,7 @@ void RuleBasedBreakIterator::DictionaryCache::reset() { fLimit = 0; fFirstRuleStatusIndex = 0; fOtherRuleStatusIndex = 0; - fBreaks->removeAllElements(); + fBreaks.removeAllElements(); } UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) { @@ -54,13 +51,13 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_ // Sequential iteration, move from previous boundary to the following int32_t r = 0; - if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) { + if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { ++fPositionInCache; - if (fPositionInCache >= fBreaks->size()) { + if (fPositionInCache >= fBreaks.size()) { fPositionInCache = -1; return FALSE; } - r = fBreaks->elementAti(fPositionInCache); + r = fBreaks.elementAti(fPositionInCache); U_ASSERT(r > fromPos); *result = r; *statusIndex = fOtherRuleStatusIndex; @@ -69,8 +66,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_ // Random indexing. Linear search for the boundary following the given position. - for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) { - r= fBreaks->elementAti(fPositionInCache); + for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) { + r= fBreaks.elementAti(fPositionInCache); if (r > fromPos) { *result = r; *statusIndex = fOtherRuleStatusIndex; @@ -90,16 +87,16 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_ } if (fromPos == fLimit) { - fPositionInCache = fBreaks->size() - 1; + fPositionInCache = fBreaks.size() - 1; if (fPositionInCache >= 0) { - U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos); + U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos); } } int32_t r; - if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) { + if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { --fPositionInCache; - r = fBreaks->elementAti(fPositionInCache); + r = fBreaks.elementAti(fPositionInCache); U_ASSERT(r < fromPos); *result = r; *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; @@ -111,8 +108,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_ return FALSE; } - for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) { - r = fBreaks->elementAti(fPositionInCache); + for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) { + r = fBreaks.elementAti(fPositionInCache); if (r < fromPos) { *result = r; *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; @@ -141,7 +138,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo int32_t current; UErrorCode status = U_ZERO_ERROR; int32_t foundBreakCount = 0; - UText *text = fBI->fText; + UText *text = &fBI->fText; // Loop through the text, looking for ranges of dictionary characters. // For each span, find the appropriate break engine, and ask it to find @@ -168,7 +165,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // Ask the language object if there are any breaks. It will add them to the cache and // leave the text pointer on the other side of its range, ready to search for the next one. if (lbe != NULL) { - foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks); + foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks); } // Reload the loop variables for the next go-round @@ -182,21 +179,21 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // printf("foundBreakCount = %d\n", foundBreakCount); if (foundBreakCount > 0) { - U_ASSERT(foundBreakCount == fBreaks->size()); - if (startPos < fBreaks->elementAti(0)) { + U_ASSERT(foundBreakCount == fBreaks.size()); + if (startPos < fBreaks.elementAti(0)) { // The dictionary did not place a boundary at the start of the segment of text. // Add one now. This should not commonly happen, but it would be easy for interactions // of the rules for dictionary segments and the break engine implementations to // inadvertently cause it. Cover it here, just in case. - fBreaks->insertElementAt(startPos, 0, status); + fBreaks.insertElementAt(startPos, 0, status); } - if (endPos > fBreaks->peeki()) { - fBreaks->push(endPos, status); + if (endPos > fBreaks.peeki()) { + fBreaks.push(endPos, status); } fPositionInCache = 0; // Note: Dictionary matching may extend beyond the original limit. - fStart = fBreaks->elementAti(0); - fLimit = fBreaks->peeki(); + fStart = fBreaks.elementAti(0); + fLimit = fBreaks.peeki(); } else { // there were no language-based breaks, even though the segment contained // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache diff --git a/deps/icu-small/source/common/rbbi_cache.h b/deps/icu-small/source/common/rbbi_cache.h index 8dc7320db9..fd6deb4333 100644 --- a/deps/icu-small/source/common/rbbi_cache.h +++ b/deps/icu-small/source/common/rbbi_cache.h @@ -56,7 +56,7 @@ class RuleBasedBreakIterator::DictionaryCache: public UMemory { RuleBasedBreakIterator *fBI; - UVector32 *fBreaks; // A vector containing the boundaries. + UVector32 fBreaks; // A vector containing the boundaries. int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following() // or preceding(). Optimizes sequential access. int32_t fStart; // Text position of first boundary in cache. diff --git a/deps/icu-small/source/common/rbbidata.cpp b/deps/icu-small/source/common/rbbidata.cpp index d66eca82f8..18912a6a7b 100644 --- a/deps/icu-small/source/common/rbbidata.cpp +++ b/deps/icu-small/source/common/rbbidata.cpp @@ -267,8 +267,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab #endif -#ifdef RBBI_DEBUG void RBBIDataWrapper::printData() { +#ifdef RBBI_DEBUG RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader); RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1], fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]); @@ -285,8 +285,8 @@ void RBBIDataWrapper::printData() { RBBIDebugPrintf("%c", fRuleSource[c]); } RBBIDebugPrintf("\n\n"); -} #endif +} U_NAMESPACE_END diff --git a/deps/icu-small/source/common/rbbidata.h b/deps/icu-small/source/common/rbbidata.h index 75427863d9..8b21acca30 100644 --- a/deps/icu-small/source/common/rbbidata.h +++ b/deps/icu-small/source/common/rbbidata.h @@ -116,9 +116,10 @@ struct RBBIStateTableRow { /* StatusTable of the set of matching */ /* tags (rule status values) */ int16_t fReserved; - uint16_t fNextState[2]; /* Next State, indexed by char category. */ - /* This array does not have two elements */ - /* Array Size is actually fData->fHeader->fCatCount */ + uint16_t fNextState[1]; /* Next State, indexed by char category. */ + /* Variable-length array declared with length 1 */ + /* to disable bounds checkers. */ + /* Array Size is actually fData->fHeader->fCatCount*/ /* CAUTION: see RBBITableBuilder::getTableSize() */ /* before changing anything here. */ }; @@ -129,7 +130,9 @@ struct RBBIStateTable { uint32_t fRowLen; /* Length of a state table row, in bytes. */ uint32_t fFlags; /* Option Flags for this state table */ uint32_t fReserved; /* reserved */ - char fTableData[4]; /* First RBBIStateTableRow begins here. */ + char fTableData[1]; /* First RBBIStateTableRow begins here. */ + /* Variable-length array declared with length 1 */ + /* to disable bounds checkers. */ /* (making it char[] simplifies ugly address */ /* arithmetic for indexing variable length rows.) */ }; @@ -162,13 +165,8 @@ public: UBool operator ==(const RBBIDataWrapper &other) const; int32_t hashCode(); const UnicodeString &getRuleSourceString() const; -#ifdef RBBI_DEBUG void printData(); void printTable(const char *heading, const RBBIStateTable *table); -#else - #define printData() - #define printTable(heading, table) -#endif /* */ /* Pointers to items within the data */ diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp index c67f6f8166..9fc8f8e814 100644 --- a/deps/icu-small/source/common/rbbirb.cpp +++ b/deps/icu-small/source/common/rbbirb.cpp @@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, UParseError *parseErr, UErrorCode &status) - : fRules(rules) + : fRules(rules), fStrippedRules(rules) { fStatus = &status; // status is checked below fParseError = parseErr; @@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { return NULL; } - // Remove comments and whitespace from the rules to make it smaller. - UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules)); + // Remove whitespace from the rules to make it smaller. + // The rule parser has already removed comments. + fStrippedRules = fScanner->stripRules(fStrippedRules); // Calculate the size of each section in the data. // Sizes here are padded up to a multiple of 8 for better memory alignment. @@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize()); int32_t trieSize = align8(fSetBuilder->getTrieSize()); int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); - int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar)); + int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar)); (void)safeFwdTableSize; @@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { data->fStatusTable = data->fTrie + trieSize; data->fStatusTableLen= statusTableSize; data->fRuleSource = data->fStatusTable + statusTableSize; - data->fRuleSourceLen = strippedRules.length() * sizeof(UChar); + data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar); uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); @@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { ruleStatusTable[i] = fRuleStatusVals->elementAti(i); } - strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); + fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); return data; } @@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, // // UnicodeSet processing. // Munge the Unicode Sets to create a set of character categories. - // Generate the mapping tables (TRIE) from input 32-bit characters to + // Generate the mapping tables (TRIE) from input code points to // the character categories. // - builder.fSetBuilder->build(); + builder.fSetBuilder->buildRanges(); // @@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, } #endif + builder.optimizeTables(); + builder.fSetBuilder->buildTrie(); + + + // // Package up the compiled data into a memory image // in the run-time format. @@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, return This; } +void RBBIRuleBuilder::optimizeTables() { + int32_t leftClass; + int32_t rightClass; + + leftClass = 3; + rightClass = 0; + while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) { + fSetBuilder->mergeCategories(leftClass, rightClass); + fForwardTables->removeColumn(rightClass); + fReverseTables->removeColumn(rightClass); + fSafeFwdTables->removeColumn(rightClass); + fSafeRevTables->removeColumn(rightClass); + } + + fForwardTables->removeDuplicateStates(); + fReverseTables->removeDuplicateStates(); + fSafeFwdTables->removeDuplicateStates(); + fSafeRevTables->removeDuplicateStates(); + + + +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/icu-small/source/common/rbbirb.h b/deps/icu-small/source/common/rbbirb.h index 6fbdbff744..511f394b45 100644 --- a/deps/icu-small/source/common/rbbirb.h +++ b/deps/icu-small/source/common/rbbirb.h @@ -126,10 +126,19 @@ public: ); virtual ~RBBIRuleBuilder(); + + /** + * Fold together redundant character classes (table columns) and + * redundant states (table rows). Done after initial table generation, + * before serializing the result. + */ + void optimizeTables(); + char *fDebugEnv; // controls debug trace output UErrorCode *fStatus; // Error reporting. Keeping status UParseError *fParseError; // here avoids passing it everywhere. const UnicodeString &fRules; // The rule string that we are compiling + UnicodeString fStrippedRules; // The rule string, with comments stripped. RBBIRuleScanner *fScanner; // The scanner. RBBINode *fForwardTree; // The parse trees, generated by the scanner, diff --git a/deps/icu-small/source/common/rbbiscan.cpp b/deps/icu-small/source/common/rbbiscan.cpp index 1653a0c7bc..e3472ed599 100644 --- a/deps/icu-small/source/common/rbbiscan.cpp +++ b/deps/icu-small/source/common/rbbiscan.cpp @@ -822,27 +822,24 @@ static const UChar chRParen = 0x29; //------------------------------------------------------------------------------ // -// stripRules Return a rules string without unnecessary -// characters. +// stripRules Return a rules string without extra spaces. +// (Comments are removed separately, during rule parsing.) // //------------------------------------------------------------------------------ UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { UnicodeString strippedRules; - int rulesLength = rules.length(); - for (int idx = 0; idx < rulesLength; ) { - UChar ch = rules[idx++]; - if (ch == chPound) { - while (idx < rulesLength - && ch != chCR && ch != chLF && ch != chNEL) - { - ch = rules[idx++]; - } - } - if (!u_isISOControl(ch)) { - strippedRules.append(ch); + int32_t rulesLength = rules.length(); + bool skippingSpaces = false; + + for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) { + UChar32 cp = rules.char32At(idx); + bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE); + if (skippingSpaces && whiteSpace) { + continue; } + strippedRules.append(cp); + skippingSpaces = whiteSpace; } - // strippedRules = strippedRules.unescape(); return strippedRules; } @@ -942,6 +939,7 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { // It will be treated as white-space, and serves to break up anything // that might otherwise incorrectly clump together with a comment in // the middle (a variable name, for example.) + int32_t commentStart = fScanIndex; for (;;) { c.fChar = nextCharLL(); if (c.fChar == (UChar32)-1 || // EOF @@ -950,6 +948,9 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { c.fChar == chNEL || c.fChar == chLS) {break;} } + for (int32_t i=commentStart; i<fNextIndex-1; ++i) { + fRB->fStrippedRules.setCharAt(i, u' '); + } } if (c.fChar == (UChar32)-1) { return; diff --git a/deps/icu-small/source/common/rbbisetb.cpp b/deps/icu-small/source/common/rbbisetb.cpp index c172da00df..4e7389b4af 100644 --- a/deps/icu-small/source/common/rbbisetb.cpp +++ b/deps/icu-small/source/common/rbbisetb.cpp @@ -91,7 +91,7 @@ RBBISetBuilder::~RBBISetBuilder() // from the Unicode Sets. // //------------------------------------------------------------------------ -void RBBISetBuilder::build() { +void RBBISetBuilder::buildRanges() { RBBINode *usetNode; RangeDescriptor *rlRange; @@ -245,11 +245,16 @@ void RBBISetBuilder::build() { if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();} if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();} +} + + +// +// Build the Trie table for mapping UChar32 values to the corresponding +// range group number. +// +void RBBISetBuilder::buildTrie() { + RangeDescriptor *rlRange; - // - // Build the Trie table for mapping UChar32 values to the corresponding - // range group number - // fTrie = utrie2_open(0, // Initial value for all code points. 0, // Error value for out-of-range input. fStatus); @@ -265,6 +270,22 @@ void RBBISetBuilder::build() { } +void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) { + U_ASSERT(left >= 1); + U_ASSERT(right > left); + for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) { + int32_t rangeNum = rd->fNum & ~DICT_BIT; + int32_t rangeDict = rd->fNum & DICT_BIT; + if (rangeNum == right) { + rd->fNum = left | rangeDict; + } else if (rangeNum > right) { + rd->fNum--; + } + } + --fGroupCount; +} + + //----------------------------------------------------------------------------------- // // getTrieSize() Return the size that will be required to serialize the Trie. @@ -446,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() { lastPrintedGroupNum = groupNum; RBBIDebugPrintf("%2i ", groupNum); - if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");} + if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");} for (i=0; i<rlRange->fIncludesSets->size(); i++) { RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); @@ -639,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) { void RangeDescriptor::setDictionaryFlag() { int i; - for (i=0; i<this->fIncludesSets->size(); i++) { - RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); - UnicodeString setName; - RBBINode *setRef = usetNode->fParent; - if (setRef != NULL) { + static const char16_t *dictionary = u"dictionary"; + for (i=0; i<fIncludesSets->size(); i++) { + RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); + RBBINode *setRef = usetNode->fParent; + if (setRef != nullptr) { RBBINode *varRef = setRef->fParent; - if (varRef != NULL && varRef->fType == RBBINode::varRef) { - setName = varRef->fText; + if (varRef && varRef->fType == RBBINode::varRef) { + const UnicodeString *setName = &varRef->fText; + if (setName->compare(dictionary, -1) == 0) { + fNum |= RBBISetBuilder::DICT_BIT; + break; + } } } - if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals. - this->fNum |= 0x4000; - break; - } } } diff --git a/deps/icu-small/source/common/rbbisetb.h b/deps/icu-small/source/common/rbbisetb.h index 7cedb45b33..a7a91b3b37 100644 --- a/deps/icu-small/source/common/rbbisetb.h +++ b/deps/icu-small/source/common/rbbisetb.h @@ -82,7 +82,8 @@ public: RBBISetBuilder(RBBIRuleBuilder *rb); ~RBBISetBuilder(); - void build(); + void buildRanges(); + void buildTrie(); void addValToSets(UVector *sets, uint32_t val); void addValToSet (RBBINode *usetNode, uint32_t val); int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the @@ -93,6 +94,13 @@ public: UChar32 getFirstChar(int32_t val) const; UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo // character were encountered. + /** merge two character categories that have been identified as having equivalent behavior. + * The ranges belonging to the right category (table column) will be added to the left. + */ + void mergeCategories(int32_t left, int32_t right); + + static constexpr int32_t DICT_BIT = 0x4000; + #ifdef RBBI_DEBUG void printSets(); void printRanges(); diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp index b3e6ca51d1..61661a5442 100644 --- a/deps/icu-small/source/common/rbbitblb.cpp +++ b/deps/icu-small/source/common/rbbitblb.cpp @@ -22,6 +22,7 @@ #include "rbbidata.h" #include "cstring.h" #include "uassert.h" +#include "uvectr32.h" #include "cmemory.h" U_NAMESPACE_BEGIN @@ -761,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() { // if sd->fAccepting already had a value other than 0 or -1, leave it be. // If the end marker node is from a look-ahead rule, set - // the fLookAhead field or this state also. + // the fLookAhead field for this state also. if (endMarker->fLookAheadEnd) { // TODO: don't change value if already set? // TODO: allow for more than one active look-ahead rule in engine. @@ -1077,7 +1078,128 @@ void RBBITableBuilder::printPosSets(RBBINode *n) { } #endif +// +// findDuplCharClassFrom() +// +bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + uint16_t table_base; + uint16_t table_dupl; + for (; baseCategory < numCols-1; ++baseCategory) { + for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) { + for (int32_t state=0; state<numStates; state++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + table_base = (uint16_t)sd->fDtran->elementAti(baseCategory); + table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory); + if (table_base != table_dupl) { + break; + } + } + if (table_base == table_dupl) { + return true; + } + } + } + return false; +} + + +// +// removeColumn() +// +void RBBITableBuilder::removeColumn(int32_t column) { + int32_t numStates = fDStates->size(); + for (int32_t state=0; state<numStates; state++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + U_ASSERT(column < sd->fDtran->size()); + sd->fDtran->removeElementAt(column); + } +} + +/* + * findDuplicateState + */ +bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + for (; firstState<numStates-1; ++firstState) { + RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState); + for (duplState=firstState+1; duplState<numStates; ++duplState) { + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); + if (firstSD->fAccepting != duplSD->fAccepting || + firstSD->fLookAhead != duplSD->fLookAhead || + firstSD->fTagsIdx != duplSD->fTagsIdx) { + continue; + } + bool rowsMatch = true; + for (int32_t col=0; col < numCols; ++col) { + int32_t firstVal = firstSD->fDtran->elementAti(col); + int32_t duplVal = duplSD->fDtran->elementAti(col); + if (!((firstVal == duplVal) || + ((firstVal == firstState || firstVal == duplState) && + (duplVal == firstState || duplVal == duplState)))) { + rowsMatch = false; + break; + } + } + if (rowsMatch) { + return true; + } + } + } + return false; +} + +void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) { + U_ASSERT(keepState < duplState); + U_ASSERT(duplState < fDStates->size()); + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); + fDStates->removeElementAt(duplState); + delete duplSD; + + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + for (int32_t state=0; state<numStates; ++state) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + for (int32_t col=0; col<numCols; col++) { + int32_t existingVal = sd->fDtran->elementAti(col); + int32_t newVal = existingVal; + if (existingVal == duplState) { + newVal = keepState; + } else if (existingVal > duplState) { + newVal = existingVal - 1; + } + sd->fDtran->setElementAt(newVal, col); + } + if (sd->fAccepting == duplState) { + sd->fAccepting = keepState; + } else if (sd->fAccepting > duplState) { + sd->fAccepting--; + } + if (sd->fLookAhead == duplState) { + sd->fLookAhead = keepState; + } else if (sd->fLookAhead > duplState) { + sd->fLookAhead--; + } + } +} + + +/* + * RemoveDuplicateStates + */ +void RBBITableBuilder::removeDuplicateStates() { + int32_t firstState = 3; + int32_t duplicateState = 0; + while (findDuplicateState(firstState, duplicateState)) { + // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState); + removeState(firstState, duplicateState); + } +} //----------------------------------------------------------------------------- // @@ -1095,21 +1217,17 @@ int32_t RBBITableBuilder::getTableSize() const { return 0; } - size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table. + size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table. numRows = fDStates->size(); numCols = fRB->fSetBuilder->getNumCharCategories(); - // Note The declaration of RBBIStateTableRow is for a table of two columns. - // Therefore we subtract two from numCols when determining - // how much storage to add to a row for the total columns. - rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2); + rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols; size += numRows * rowSize; return size; } - //----------------------------------------------------------------------------- // // exportTable() export the state transition table in the format required @@ -1126,14 +1244,14 @@ void RBBITableBuilder::exportTable(void *where) { return; } - if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff || + int32_t catCount = fRB->fSetBuilder->getNumCharCategories(); + if (catCount > 0x7fff || fDStates->size() > 0x7fff) { *fStatus = U_BRK_INTERNAL_ERROR; return; } - table->fRowLen = sizeof(RBBIStateTableRow) + - sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2); + table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount; table->fNumStates = fDStates->size(); table->fFlags = 0; if (fRB->fLookAheadHardBreak) { @@ -1152,7 +1270,7 @@ void RBBITableBuilder::exportTable(void *where) { row->fAccepting = (int16_t)sd->fAccepting; row->fLookAhead = (int16_t)sd->fLookAhead; row->fTagIdx = (int16_t)sd->fTagsIdx; - for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) { + for (col=0; col<catCount; col++) { row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col); } } @@ -1259,7 +1377,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu fPositions = NULL; fDtran = NULL; - fDtran = new UVector(lastInputSymbol+1, *fStatus); + fDtran = new UVector32(lastInputSymbol+1, *fStatus); if (U_FAILURE(*fStatus)) { return; } @@ -1267,7 +1385,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu *fStatus = U_MEMORY_ALLOCATION_ERROR; return; } - fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized. + fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized. // It is indexed by input symbols, and will // hold the next state number for each // symbol. diff --git a/deps/icu-small/source/common/rbbitblb.h b/deps/icu-small/source/common/rbbitblb.h index 1041501878..09b57b5cf0 100644 --- a/deps/icu-small/source/common/rbbitblb.h +++ b/deps/icu-small/source/common/rbbitblb.h @@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN class RBBIRuleScanner; class RBBIRuleBuilder; +class UVector32; // // class RBBITableBuilder is part of the RBBI rule compiler. @@ -42,9 +43,24 @@ public: void build(); int32_t getTableSize() const; // Return the runtime size in bytes of // the built state table - void exportTable(void *where); // fill in the runtime state table. - // Sufficient memory must exist at - // the specified location. + + /** Fill in the runtime state table. Sufficient memory must exist at the specified location. + */ + void exportTable(void *where); + + /** Find duplicate (redundant) character classes, beginning after the specifed + * pair, within this state table. This is an iterator-like function, used to + * identify char classes (state table columns) that can be eliminated. + */ + bool findDuplCharClassFrom(int &baseClass, int &duplClass); + + /** Remove a column from the state table. Used when two character categories + * have been found equivalent, and merged together, to eliminate the uneeded table column. + */ + void removeColumn(int32_t column); + + /** Check for, and remove dupicate states (table rows). */ + void removeDuplicateStates(); private: @@ -60,8 +76,29 @@ private: void flagTaggedStates(); void mergeRuleStatusVals(); + /** + * Merge redundant state table columns, eliminating character classes with identical behavior. + * Done after the state tables are generated, just before converting to their run-time format. + */ + int32_t mergeColumns(); + void addRuleRootNodes(UVector *dest, RBBINode *node); + /** Find the next duplicate state. An iterator function. + * @param firstState (in/out) begin looking at this state, return the first of the + * pair of duplicates. + * @param duplicateState returns the duplicate state of fistState + * @return true if a duplicate pair of states was found. + */ + bool findDuplicateState(int32_t &firstState, int32_t &duplicateState); + + /** Remove a duplicate state/ + * @param keepState First of the duplicate pair. Keep it. + * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state + * to refer to keepState instead. + */ + void removeState(int32_t keepState, int32_t duplState); + // Set functions for UVector. // TODO: make a USet subclass of UVector @@ -112,7 +149,7 @@ public: // with this state. Unordered (it's a set). // UVector contents are RBBINode * - UVector *fDtran; // Transitions out of this state. + UVector32 *fDtran; // Transitions out of this state. // indexed by input character // contents is int index of dest state // in RBBITableBuilder.fDStates diff --git a/deps/icu-small/source/common/sharedobject.cpp b/deps/icu-small/source/common/sharedobject.cpp index 37aa458e00..6eeca8605f 100644 --- a/deps/icu-small/source/common/sharedobject.cpp +++ b/deps/icu-small/source/common/sharedobject.cpp @@ -8,7 +8,10 @@ * sharedobject.cpp */ #include "sharedobject.h" +#include "mutex.h" #include "uassert.h" +#include "umutex.h" +#include "unifiedcache.h" U_NAMESPACE_BEGIN @@ -17,69 +20,41 @@ SharedObject::~SharedObject() {} UnifiedCacheBase::~UnifiedCacheBase() {} void -SharedObject::addRef(UBool fromWithinCache) const { - umtx_atomic_inc(&totalRefCount); - - // Although items in use may not be correct immediately, it - // will be correct eventually. - if (umtx_atomic_inc(&hardRefCount) == 1 && cachePtr != NULL) { - // If this object is cached, and the hardRefCount goes from 0 to 1, - // then the increment must happen from within the cache while the - // cache global mutex is locked. In this way, we can be rest assured - // that data races can't happen if the cache performs some task if - // the hardRefCount is zero while the global cache mutex is locked. - (void)fromWithinCache; // Suppress unused variable warning in non-debug builds. - U_ASSERT(fromWithinCache); - cachePtr->incrementItemsInUse(); - } +SharedObject::addRef() const { + umtx_atomic_inc(&hardRefCount); } +// removeRef Decrement the reference count and delete if it is zero. +// Note that SharedObjects with a non-null cachePtr are owned by the +// unified cache, and the cache will be responsible for the actual deletion. +// The deletion could be as soon as immediately following the +// update to the reference count, if another thread is running +// a cache eviction cycle concurrently. +// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects. +// THE OBJECT MAY ALREADY BE GONE. void -SharedObject::removeRef(UBool fromWithinCache) const { - UBool decrementItemsInUse = (umtx_atomic_dec(&hardRefCount) == 0); - UBool allReferencesGone = (umtx_atomic_dec(&totalRefCount) == 0); - - // Although items in use may not be correct immediately, it - // will be correct eventually. - if (decrementItemsInUse && cachePtr != NULL) { - if (fromWithinCache) { - cachePtr->decrementItemsInUse(); +SharedObject::removeRef() const { + const UnifiedCacheBase *cache = this->cachePtr; + int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount); + U_ASSERT(updatedRefCount >= 0); + if (updatedRefCount == 0) { + if (cache) { + cache->handleUnreferencedObject(); } else { - cachePtr->decrementItemsInUseWithLockingAndEviction(); + delete this; } } - if (allReferencesGone) { - delete this; - } } -void -SharedObject::addSoftRef() const { - umtx_atomic_inc(&totalRefCount); - ++softRefCount; -} - -void -SharedObject::removeSoftRef() const { - --softRefCount; - if (umtx_atomic_dec(&totalRefCount) == 0) { - delete this; - } -} int32_t SharedObject::getRefCount() const { - return umtx_loadAcquire(totalRefCount); -} - -int32_t -SharedObject::getHardRefCount() const { return umtx_loadAcquire(hardRefCount); } void SharedObject::deleteIfZeroRefCount() const { - if(getRefCount() == 0) { + if (this->cachePtr == nullptr && getRefCount() == 0) { delete this; } } diff --git a/deps/icu-small/source/common/sharedobject.h b/deps/icu-small/source/common/sharedobject.h index 783b55948a..54655d0d71 100644 --- a/deps/icu-small/source/common/sharedobject.h +++ b/deps/icu-small/source/common/sharedobject.h @@ -17,6 +17,8 @@ U_NAMESPACE_BEGIN +class SharedObject; + /** * Base class for unified cache exposing enough methods to SharedObject * instances to allow their addRef() and removeRef() methods to @@ -28,22 +30,12 @@ public: UnifiedCacheBase() { } /** - * Called by addRefWhileHoldingCacheLock() when the hard reference count - * of its instance goes from 0 to 1. + * Notify the cache implementation that an object was seen transitioning to + * zero hard references. The cache may use this to keep track the number of + * unreferenced SharedObjects, and to trigger evictions. */ - virtual void incrementItemsInUse() const = 0; + virtual void handleUnreferencedObject() const = 0; - /** - * Called by removeRef() when the hard reference count of its instance - * drops from 1 to 0. - */ - virtual void decrementItemsInUseWithLockingAndEviction() const = 0; - - /** - * Called by removeRefWhileHoldingCacheLock() when the hard reference - * count of its instance drops from 1 to 0. - */ - virtual void decrementItemsInUse() const = 0; virtual ~UnifiedCacheBase(); private: UnifiedCacheBase(const UnifiedCacheBase &); @@ -63,7 +55,6 @@ class U_COMMON_API SharedObject : public UObject { public: /** Initializes totalRefCount, softRefCount to 0. */ SharedObject() : - totalRefCount(0), softRefCount(0), hardRefCount(0), cachePtr(NULL) {} @@ -71,7 +62,6 @@ public: /** Initializes totalRefCount, softRefCount to 0. */ SharedObject(const SharedObject &other) : UObject(other), - totalRefCount(0), softRefCount(0), hardRefCount(0), cachePtr(NULL) {} @@ -79,93 +69,45 @@ public: virtual ~SharedObject(); /** - * Increments the number of references to this object. Thread-safe. + * Increments the number of hard references to this object. Thread-safe. + * Not for use from within the Unified Cache implementation. */ - void addRef() const { addRef(FALSE); } + void addRef() const; /** - * Increments the number of references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void addRefWhileHoldingCacheLock() const { addRef(TRUE); } - - /** - * Increments the number of soft references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void addSoftRef() const; - - /** - * Decrements the number of references to this object. Thread-safe. - */ - void removeRef() const { removeRef(FALSE); } - - /** - * Decrements the number of references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void removeRefWhileHoldingCacheLock() const { removeRef(TRUE); } - - /** - * Decrements the number of soft references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. + * Decrements the number of hard references to this object, and + * arrange for possible cache-eviction and/or deletion if ref + * count goes to zero. Thread-safe. + * + * Not for use from within the UnifiedCache implementation. */ - void removeSoftRef() const; + void removeRef() const; /** - * Returns the reference counter including soft references. + * Returns the number of hard references for this object. * Uses a memory barrier. */ int32_t getRefCount() const; /** - * Returns the count of soft references only. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - int32_t getSoftRefCount() const { return softRefCount; } - - /** - * Returns the count of hard references only. Uses a memory barrier. - * Used for testing the cache. Regular clients won't need this. - */ - int32_t getHardRefCount() const; - - /** * If noHardReferences() == TRUE then this object has no hard references. * Must be called only from within the internals of UnifiedCache. */ - inline UBool noHardReferences() const { return getHardRefCount() == 0; } + inline UBool noHardReferences() const { return getRefCount() == 0; } /** * If hasHardReferences() == TRUE then this object has hard references. * Must be called only from within the internals of UnifiedCache. */ - inline UBool hasHardReferences() const { return getHardRefCount() != 0; } - - /** - * If noSoftReferences() == TRUE then this object has no soft references. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - UBool noSoftReferences() const { return (softRefCount == 0); } + inline UBool hasHardReferences() const { return getRefCount() != 0; } /** - * Deletes this object if it has no references or soft references. + * Deletes this object if it has no references. + * Available for non-cached SharedObjects only. Ownership of cached objects + * is with the UnifiedCache, which is soley responsible for eviction and deletion. */ void deleteIfZeroRefCount() const; - /** - * @internal For UnifedCache use only to register this object with itself. - * Must be called before this object is exposed to multiple threads. - */ - void registerWithCache(const UnifiedCacheBase *ptr) const { - cachePtr = ptr; - } /** * Returns a writable version of ptr. @@ -219,15 +161,21 @@ public: } private: - mutable u_atomic_int32_t totalRefCount; - - // Any thread modifying softRefCount must hold the global cache mutex + /** + * The number of references from the UnifiedCache, which is + * the number of times that the sharedObject is stored as a hash table value. + * For use by UnifiedCache implementation code only. + * All access is synchronized by UnifiedCache's gCacheMutex + */ mutable int32_t softRefCount; + friend class UnifiedCache; + /** + * Reference count, excluding references from within the UnifiedCache implementation. + */ mutable u_atomic_int32_t hardRefCount; + mutable const UnifiedCacheBase *cachePtr; - void addRef(UBool withCacheLock) const; - void removeRef(UBool withCacheLock) const; }; diff --git a/deps/icu-small/source/common/sprpimpl.h b/deps/icu-small/source/common/sprpimpl.h index aff40ad0da..26de904b1f 100644 --- a/deps/icu-small/source/common/sprpimpl.h +++ b/deps/icu-small/source/common/sprpimpl.h @@ -90,7 +90,6 @@ struct UStringPrepProfile{ UTrie sprepTrie; const uint16_t* mappingData; UDataMemory* sprepData; - const UBiDiProps *bdp; /* used only if checkBiDi is set */ int32_t refCount; UBool isDataLoaded; UBool doNFKC; diff --git a/deps/icu-small/source/common/ubidi.cpp b/deps/icu-small/source/common/ubidi.cpp index 8e2fc36e5f..531ed64cff 100644 --- a/deps/icu-small/source/common/ubidi.cpp +++ b/deps/icu-small/source/common/ubidi.cpp @@ -152,9 +152,6 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ uprv_memset(pBiDi, 0, sizeof(UBiDi)); - /* get BiDi properties */ - pBiDi->bdp=ubidi_getSingleton(); - /* allocate memory for arrays as requested */ if(maxLength>0) { if( !getInitialDirPropsMemory(pBiDi, maxLength) || @@ -925,7 +922,7 @@ bracketProcessChar(BracketData *bd, int32_t position) { else match=0; if(match!=c && /* has a matching char */ - ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */ + ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */ /* special case: process synonyms create an opening entry for each synonym */ if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ @@ -3033,7 +3030,7 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) if( pBiDi->fnClassCallback == NULL || (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) { - dir = ubidi_getClass(pBiDi->bdp, c); + dir = ubidi_getClass(c); } if(dir >= U_CHAR_DIRECTION_COUNT) { dir = (UCharDirection)ON; diff --git a/deps/icu-small/source/common/ubidi_props.cpp b/deps/icu-small/source/common/ubidi_props.cpp index dcfb52c897..4141c21938 100644 --- a/deps/icu-small/source/common/ubidi_props.cpp +++ b/deps/icu-small/source/common/ubidi_props.cpp @@ -44,13 +44,6 @@ struct UBiDiProps { #define INCLUDED_FROM_UBIDI_PROPS_C #include "ubidi_props_data.h" -/* UBiDiProps singleton ----------------------------------------------------- */ - -U_CFUNC const UBiDiProps * -ubidi_getSingleton() { - return &ubidi_props_singleton; -} - /* set of property starts for UnicodeSet ------------------------------------ */ static UBool U_CALLCONV @@ -64,7 +57,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32 } U_CFUNC void -ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) { +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { int32_t i, length; UChar32 c, start, limit; @@ -76,19 +69,19 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * } /* add the start code point of each same-value range of the trie */ - utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa); + utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); /* add the code points from the bidi mirroring table */ - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; for(i=0; i<length; ++i) { - c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]); + c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]); sa->addRange(sa->set, c, c+1); } /* add the code points from the Joining_Group array where the value changes */ - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; - jgArray=bdp->jgArray; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; + jgArray=ubidi_props_singleton.jgArray; for(;;) { prev=0; while(start<limit) { @@ -103,11 +96,11 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * /* add the limit code point if the last value was not 0 (it is now start==limit) */ sa->add(sa->set, limit); } - if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) { + if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) { /* switch to the second Joining_Group range */ - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; - jgArray=bdp->jgArray2; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; + jgArray=ubidi_props_singleton.jgArray2; } else { break; } @@ -121,14 +114,8 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * /* property access functions ------------------------------------------------ */ U_CFUNC int32_t -ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { - int32_t max; - - if(bdp==NULL) { - return -1; - } - - max=bdp->indexes[UBIDI_MAX_VALUES_INDEX]; +ubidi_getMaxValue(UProperty which) { + int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX]; switch(which) { case UCHAR_BIDI_CLASS: return (max&UBIDI_CLASS_MASK); @@ -144,19 +131,19 @@ ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { } U_CAPI UCharDirection -ubidi_getClass(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getClass(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UCharDirection)UBIDI_GET_CLASS(props); } U_CFUNC UBool -ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isMirrored(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); } static UChar32 -getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { +getMirror(UChar32 c, uint16_t props) { int32_t delta=UBIDI_GET_MIRROR_DELTA(props); if(delta!=UBIDI_ESC_MIRROR_DELTA) { return c+delta; @@ -167,8 +154,8 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { int32_t i, length; UChar32 c2; - mirrors=bdp->mirrors; - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + mirrors=ubidi_props_singleton.mirrors; + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; /* linear search */ for(i=0; i<length; ++i) { @@ -188,59 +175,59 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { } U_CFUNC UChar32 -ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return getMirror(bdp, c, props); +ubidi_getMirror(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return getMirror(c, props); } U_CFUNC UBool -ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isBidiControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); } U_CFUNC UBool -ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isJoinControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); } U_CFUNC UJoiningType -ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getJoiningType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); } U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) { +ubidi_getJoiningGroup(UChar32 c) { UChar32 start, limit; - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; if(start<=c && c<limit) { - return (UJoiningGroup)bdp->jgArray[c-start]; + return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start]; } - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; if(start<=c && c<limit) { - return (UJoiningGroup)bdp->jgArray2[c-start]; + return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start]; } return U_JG_NO_JOINING_GROUP; } U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getPairedBracketType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT); } U_CFUNC UChar32 -ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getPairedBracket(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); if((props&UBIDI_BPT_MASK)==0) { return c; } else { - return getMirror(bdp, c, props); + return getMirror(c, props); } } @@ -248,20 +235,20 @@ ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { U_CFUNC UCharDirection u_charDirection(UChar32 c) { - return ubidi_getClass(&ubidi_props_singleton, c); + return ubidi_getClass(c); } U_CFUNC UBool u_isMirrored(UChar32 c) { - return ubidi_isMirrored(&ubidi_props_singleton, c); + return ubidi_isMirrored(c); } U_CFUNC UChar32 u_charMirror(UChar32 c) { - return ubidi_getMirror(&ubidi_props_singleton, c); + return ubidi_getMirror(c); } U_STABLE UChar32 U_EXPORT2 u_getBidiPairedBracket(UChar32 c) { - return ubidi_getPairedBracket(&ubidi_props_singleton, c); + return ubidi_getPairedBracket(c); } diff --git a/deps/icu-small/source/common/ubidi_props.h b/deps/icu-small/source/common/ubidi_props.h index 69e8853e69..698ee9c52b 100644 --- a/deps/icu-small/source/common/ubidi_props.h +++ b/deps/icu-small/source/common/ubidi_props.h @@ -31,46 +31,40 @@ U_CDECL_BEGIN /* library API -------------------------------------------------------------- */ -struct UBiDiProps; -typedef struct UBiDiProps UBiDiProps; - -U_CFUNC const UBiDiProps * -ubidi_getSingleton(void); - U_CFUNC void -ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode); +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); /* property access functions */ U_CFUNC int32_t -ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which); +ubidi_getMaxValue(UProperty which); U_CAPI UCharDirection -ubidi_getClass(const UBiDiProps *bdp, UChar32 c); +ubidi_getClass(UChar32 c); U_CFUNC UBool -ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c); +ubidi_isMirrored(UChar32 c); U_CFUNC UChar32 -ubidi_getMirror(const UBiDiProps *bdp, UChar32 c); +ubidi_getMirror(UChar32 c); U_CFUNC UBool -ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c); +ubidi_isBidiControl(UChar32 c); U_CFUNC UBool -ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c); +ubidi_isJoinControl(UChar32 c); U_CFUNC UJoiningType -ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c); +ubidi_getJoiningType(UChar32 c); U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c); +ubidi_getJoiningGroup(UChar32 c); U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c); +ubidi_getPairedBracketType(UChar32 c); U_CFUNC UChar32 -ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c); +ubidi_getPairedBracket(UChar32 c); /* file definitions --------------------------------------------------------- */ diff --git a/deps/icu-small/source/common/ubidiimp.h b/deps/icu-small/source/common/ubidiimp.h index fd64fac34d..a5d0727495 100644 --- a/deps/icu-small/source/common/ubidiimp.h +++ b/deps/icu-small/source/common/ubidiimp.h @@ -254,8 +254,6 @@ struct UBiDi { */ const UBiDi * pParaBiDi; - const UBiDiProps *bdp; - /* alias pointer to the current text */ const UChar *text; diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index 1f41dbf6de..28d5a4cac6 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -77,9 +77,12 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { /* data access primitives --------------------------------------------------- */ -#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie() { + return &ucase_props_singleton.trie; +} -#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) +#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) /* number of bits in an 8-bit integer value */ static const uint8_t flagsOffset[256]={ @@ -128,8 +131,8 @@ static const uint8_t flagsOffset[256]={ U_CAPI UChar32 U_EXPORT2 ucase_tolower(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { c+=UCASE_GET_DELTA(props); } } else { @@ -145,7 +148,7 @@ ucase_tolower(UChar32 c) { U_CAPI UChar32 U_EXPORT2 ucase_toupper(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } @@ -162,7 +165,7 @@ ucase_toupper(UChar32 c) { U_CAPI UChar32 U_EXPORT2 ucase_totitle(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } @@ -223,7 +226,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { } props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)!=UCASE_NONE) { /* add the one simple case mapping, no matter what type it is */ int32_t delta=UCASE_GET_DELTA(props); @@ -419,6 +422,138 @@ FullCaseFoldingIterator::next(UnicodeString &full) { return c; } +namespace LatinCase { + +const int8_t TO_LOWER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_LOWER_TR_LT[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_UPPER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +const int8_t TO_UPPER_TR[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +} // namespace LatinCase + U_NAMESPACE_END /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ @@ -439,7 +574,7 @@ ucase_getTypeOrIgnorable(UChar32 c) { static inline int32_t getDotType(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { return props&UCASE_DOT_MASK; } else { const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); @@ -878,8 +1013,8 @@ ucase_toFullLower(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { result=c+UCASE_GET_DELTA(props); } } else { @@ -1024,7 +1159,7 @@ toUpperOrTitle(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { result=c+UCASE_GET_DELTA(props); } @@ -1169,8 +1304,8 @@ ucase_toFullTitle(UChar32 c, U_CAPI UChar32 U_EXPORT2 ucase_fold(UChar32 c, uint32_t options) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { c+=UCASE_GET_DELTA(props); } } else { @@ -1234,8 +1369,8 @@ ucase_toFullFolding(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { result=c+UCASE_GET_DELTA(props); } } else { diff --git a/deps/icu-small/source/common/ucase.h b/deps/icu-small/source/common/ucase.h index 9d6365eadf..a7a8c9f00d 100644 --- a/deps/icu-small/source/common/ucase.h +++ b/deps/icu-small/source/common/ucase.h @@ -26,6 +26,7 @@ #include "putilimp.h" #include "uset_imp.h" #include "udataswp.h" +#include "utrie2.h" #ifdef __cplusplus U_NAMESPACE_BEGIN @@ -148,6 +149,33 @@ private: int32_t rowCpIndex; }; +/** + * Fast case mapping data for ASCII/Latin. + * Linear arrays of delta bytes: 0=no mapping; EXC=exception. + * Deltas must not cross the ASCII boundary, or else they cannot be easily used + * in simple UTF-8 code. + */ +namespace LatinCase { + +/** Case mapping/folding data for code points up to U+017F. */ +constexpr UChar LIMIT = 0x180; +/** U+017F case-folds and uppercases crossing the ASCII boundary. */ +constexpr UChar LONG_S = 0x17f; +/** Exception: Complex mapping, or too-large delta. */ +constexpr int8_t EXC = -0x80; + +/** Deltas for lowercasing for most locales, and default case folding. */ +extern const int8_t TO_LOWER_NORMAL[LIMIT]; +/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ +extern const int8_t TO_LOWER_TR_LT[LIMIT]; + +/** Deltas for uppercasing for most locales. */ +extern const int8_t TO_UPPER_NORMAL[LIMIT]; +/** Deltas for uppercasing for tr/az. */ +extern const int8_t TO_UPPER_TR[LIMIT]; + +} // namespace LatinCase + U_NAMESPACE_END #endif @@ -308,6 +336,9 @@ enum { /* definitions for 16-bit case properties word ------------------------------ */ +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie(); + /* 2-bit constants for types of cased characters */ #define UCASE_TYPE_MASK 3 enum { @@ -320,10 +351,14 @@ enum { #define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) #define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) +#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2) + #define UCASE_IGNORABLE 4 #define UCASE_SENSITIVE 8 #define UCASE_EXCEPTION 0x10 +#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) + #define UCASE_DOT_MASK 0x60 enum { UCASE_NO_DOT=0, /* normal characters with cc=0 */ diff --git a/deps/icu-small/source/common/ucasemap.cpp b/deps/icu-small/source/common/ucasemap.cpp index 8eec93c6e3..99e30c9fc6 100644 --- a/deps/icu-small/source/common/ucasemap.cpp +++ b/deps/icu-small/source/common/ucasemap.cpp @@ -165,9 +165,7 @@ appendResult(int32_t cpLength, int32_t result, const UChar *s, inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } -} // namespace - -static UChar32 U_CALLCONV +UChar32 U_CALLCONV utf8_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; UChar32 c; @@ -199,36 +197,227 @@ utf8_caseContextIterator(void *context, int8_t dir) { return U_SENTINEL; } -/* - * Case-maps [srcStart..srcLimit[ but takes - * context [0..srcLength[ into account. +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. */ -static void -_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, - const uint8_t *src, UCaseContext *csc, - int32_t srcStart, int32_t srcLimit, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex=srcStart; - while (U_SUCCESS(errorCode) && srcIndex<srcLimit) { +void toLower(int32_t caseLocale, uint32_t options, + const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, + icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToLower; + if (caseLocale == UCASE_LOC_ROOT || + (caseLocale >= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases int32_t cpStart; - csc->cpStart=cpStart=srcIndex; UChar32 c; - U8_NEXT(src, srcIndex, srcLimit, c); - csc->cpLimit=srcIndex; - if(c<0) { - // Malformed UTF-8. - ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart, + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLimit) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToLower[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLimit && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLimit, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + const UChar *s; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale); } else { - const UChar *s; - c=map(c, utf8_caseContextIterator, csc, &s, caseLocale); + c = ucase_toFullFolding(c, &s, options); + } + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; } } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); } +void toUpper(int32_t caseLocale, uint32_t options, + const uint8_t *src, UCaseContext *csc, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + int32_t cpStart; + UChar32 c; + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLength) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToUpper[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLength && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLength, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; + } + } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); +} + +} // namespace + #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC void U_CALLCONV @@ -335,10 +524,9 @@ ucasemap_internalUTF8ToTitle( if(titleLimit<index) { if((options&U_TITLECASE_NO_LOWERCASE)==0) { /* Normal operation: Lowercase the rest of the word. */ - _caseMap(caseLocale, options, ucase_toFullLower, - src, &csc, - titleLimit, index, - sink, edits, errorCode); + toLower(caseLocale, options, + src, &csc, titleLimit, index, + sink, edits, errorCode); if(U_FAILURE(errorCode)) { return; } @@ -538,8 +726,8 @@ ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREA UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - _caseMap( - caseLocale, options, ucase_toFullLower, + toLower( + caseLocale, options, src, &csc, 0, srcLength, sink, edits, errorCode); } @@ -555,9 +743,9 @@ ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREA UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - _caseMap( - caseLocale, options, ucase_toFullUpper, - src, &csc, 0, srcLength, + toUpper( + caseLocale, options, + src, &csc, srcLength, sink, edits, errorCode); } } @@ -567,22 +755,10 @@ ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_B const uint8_t *src, int32_t srcLength, icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex = 0; - while (U_SUCCESS(errorCode) && srcIndex < srcLength) { - int32_t cpStart = srcIndex; - UChar32 c; - U8_NEXT(src, srcIndex, srcLength, c); - if(c<0) { - // Malformed UTF-8. - ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart, - sink, options, edits, errorCode); - } else { - const UChar *s; - c = ucase_toFullFolding(c, &s, options); - appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); - } - } + toLower( + -1, options, + src, nullptr, 0, srcLength, + sink, edits, errorCode); } void diff --git a/deps/icu-small/source/common/ucasemap_imp.h b/deps/icu-small/source/common/ucasemap_imp.h index 99a6490279..7788fd9371 100644 --- a/deps/icu-small/source/common/ucasemap_imp.h +++ b/deps/icu-small/source/common/ucasemap_imp.h @@ -60,15 +60,6 @@ u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, int32_t *matchLen1, int32_t *matchLen2, UErrorCode *pErrorCode); -/** - * Are the Unicode properties loaded? - * This must be used before internal functions are called that do - * not perform this check. - * Generate a debug assertion failure if data is not loaded. - */ -U_CFUNC UBool -uprv_haveProperties(UErrorCode *pErrorCode); - #ifdef __cplusplus U_NAMESPACE_BEGIN diff --git a/deps/icu-small/source/common/uchar.cpp b/deps/icu-small/source/common/uchar.cpp index c3f037d73e..996c3fdc40 100644 --- a/deps/icu-small/source/common/uchar.cpp +++ b/deps/icu-small/source/common/uchar.cpp @@ -42,14 +42,6 @@ /* getting a uint32_t properties word from the data */ #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); -U_CFUNC UBool -uprv_haveProperties(UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return FALSE; - } - return TRUE; -} - /* API functions ------------------------------------------------------------ */ /* Gets the Unicode character's general category.*/ diff --git a/deps/icu-small/source/common/ucmndata.cpp b/deps/icu-small/source/common/ucmndata.cpp index 251c7ba182..ba2310bb7a 100644 --- a/deps/icu-small/source/common/ucmndata.cpp +++ b/deps/icu-small/source/common/ucmndata.cpp @@ -77,7 +77,11 @@ typedef struct { typedef struct { uint32_t count; uint32_t reserved; - PointerTOCEntry entry[2]; /* Actual size is from count. */ + /** + * Variable-length array declared with length 1 to disable bounds checkers. + * The actual array length is in the count field. + */ + PointerTOCEntry entry[1]; } PointerTOC; diff --git a/deps/icu-small/source/common/ucmndata.h b/deps/icu-small/source/common/ucmndata.h index 8c36897f16..1684441432 100644 --- a/deps/icu-small/source/common/ucmndata.h +++ b/deps/icu-small/source/common/ucmndata.h @@ -52,7 +52,11 @@ typedef struct { typedef struct { uint32_t count; - UDataOffsetTOCEntry entry[2]; /* Actual size of array is from count. */ + /** + * Variable-length array declared with length 1 to disable bounds checkers. + * The actual array length is in the count field. + */ + UDataOffsetTOCEntry entry[1]; } UDataOffsetTOC; /** diff --git a/deps/icu-small/source/common/ucnv2022.cpp b/deps/icu-small/source/common/ucnv2022.cpp index 1b625ea06c..854ca60cc3 100644 --- a/deps/icu-small/source/common/ucnv2022.cpp +++ b/deps/icu-small/source/common/ucnv2022.cpp @@ -3512,14 +3512,14 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC case 'k': if(myConverterData->version == 0) { if(length == 1) { - if((UBool)args->converter->fromUnicodeStatus) { + if(args->converter->fromUnicodeStatus) { /* in DBCS mode: switch to SBCS */ args->converter->fromUnicodeStatus = 0; *p++ = UCNV_SI; } *p++ = subchar[0]; } else /* length == 2*/ { - if(!(UBool)args->converter->fromUnicodeStatus) { + if(!args->converter->fromUnicodeStatus) { /* in SBCS mode: switch to DBCS */ args->converter->fromUnicodeStatus = 1; *p++ = UCNV_SO; diff --git a/deps/icu-small/source/common/ucnv_err.cpp b/deps/icu-small/source/common/ucnv_err.cpp index 18218835a2..63794d2334 100644 --- a/deps/icu-small/source/common/ucnv_err.cpp +++ b/deps/icu-small/source/common/ucnv_err.cpp @@ -60,11 +60,12 @@ * To avoid dependency on other code, this list is hard coded here. * When an ignorable code point is found and is unmappable, the default callbacks * will ignore them. - * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g= + * For a list of the default ignorable code points, use this link: + * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i= * * This list should be sync with the one in CharsetCallback.java */ -#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\ +#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \ (c == 0x00AD) || \ (c == 0x034F) || \ (c == 0x061C) || \ @@ -74,26 +75,15 @@ (0x180B <= c && c <= 0x180E) || \ (0x200B <= c && c <= 0x200F) || \ (0x202A <= c && c <= 0x202E) || \ - (c == 0x2060) || \ - (0x2066 <= c && c <= 0x2069) || \ - (0x2061 <= c && c <= 0x2064) || \ - (0x206A <= c && c <= 0x206F) || \ + (0x2060 <= c && c <= 0x206F) || \ (c == 0x3164) || \ - (0x0FE00 <= c && c <= 0x0FE0F) || \ - (c == 0x0FEFF) || \ - (c == 0x0FFA0) || \ - (0x01BCA0 <= c && c <= 0x01BCA3) || \ - (0x01D173 <= c && c <= 0x01D17A) || \ - (c == 0x0E0001) || \ - (0x0E0020 <= c && c <= 0x0E007F) || \ - (0x0E0100 <= c && c <= 0x0E01EF) || \ - (c == 0x2065) || \ - (0x0FFF0 <= c && c <= 0x0FFF8) || \ - (c == 0x0E0000) || \ - (0x0E0002 <= c && c <= 0x0E001F) || \ - (0x0E0080 <= c && c <= 0x0E00FF) || \ - (0x0E01F0 <= c && c <= 0x0E0FFF) \ - ) + (0xFE00 <= c && c <= 0xFE0F) || \ + (c == 0xFEFF) || \ + (c == 0xFFA0) || \ + (0xFFF0 <= c && c <= 0xFFF8) || \ + (0x1BCA0 <= c && c <= 0x1BCA3) || \ + (0x1D173 <= c && c <= 0x1D17A) || \ + (0xE0000 <= c && c <= 0xE0FFF)) /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ diff --git a/deps/icu-small/source/common/ucnv_u32.cpp b/deps/icu-small/source/common/ucnv_u32.cpp index 3fac04b300..ca8c6788d3 100644 --- a/deps/icu-small/source/common/ucnv_u32.cpp +++ b/deps/icu-small/source/common/ucnv_u32.cpp @@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, uint32_t ch, i; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, int32_t offsetNum = 0; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, uint32_t ch, i; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, int32_t offsetNum = 0; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; diff --git a/deps/icu-small/source/common/ucnv_u8.cpp b/deps/icu-small/source/common/ucnv_u8.cpp index c7ef87fd50..5a07244b02 100644 --- a/deps/icu-small/source/common/ucnv_u8.cpp +++ b/deps/icu-small/source/common/ucnv_u8.cpp @@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, int32_t i, inBytes; /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) + if (cnv->toULength > 0 && myTarget < targetLimit) { inBytes = cnv->mode; /* restore # of bytes to consume */ i = cnv->toULength; /* restore # of bytes consumed */ @@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr int32_t i, inBytes; /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) + if (cnv->toULength > 0 && myTarget < targetLimit) { inBytes = cnv->mode; /* restore # of bytes to consume */ i = cnv->toULength; /* restore # of bytes consumed */ @@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } count=(int32_t)(sourceLimit-source)+oldToULength; @@ -695,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, // Use a single counter for source and target, counting the minimum of // the source length and the target capacity. // Let the standard converter handle edge cases. - const uint8_t *limit=sourceLimit; if(count>targetCapacity) { - limit-=(count-targetCapacity); count=targetCapacity; } - // The conversion loop checks count>0 only once per 1/2/3-byte character. - // If the buffer ends with a truncated 2- or 3-byte sequence, + // The conversion loop checks count>0 only once per character. + // If the buffer ends with a truncated sequence, // then we reduce the count to stop before that, // and collect the remaining bytes after the conversion loop. - { - // Do not go back into the bytes that will be read for finishing a partial - // sequence from the previous buffer. - int32_t length=count-toULimit; - if(length>0) { - uint8_t b1=*(limit-1); - if(U8_IS_SINGLE(b1)) { - // common ASCII character - } else if(U8_IS_TRAIL(b1) && length>=2) { - uint8_t b2=*(limit-2); - if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - // truncated 3-byte sequence - count-=2; - } - } else if(0xc2<=b1 && b1<0xf0) { - // truncated 2- or 3-byte sequence - --count; - } - } - } + + // Do not go back into the bytes that will be read for finishing a partial + // sequence from the previous buffer. + int32_t length=count-toULimit; + U8_TRUNCATE_IF_INCOMPLETE(source, 0, length); + count=toULimit+length; } if(c!=0) { @@ -814,7 +799,7 @@ moreBytes: } /* copy the legal byte sequence to the target */ - if(count>=toULength) { + { int8_t i; for(i=0; i<oldToULength; ++i) { @@ -825,14 +810,6 @@ moreBytes: *target++=*source++; } count-=toULength; - } else { - // A supplementary character that does not fit into the target. - // Let the standard converter handle this. - source-=(toULength-oldToULength); - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - *pErrorCode=U_USING_DEFAULT_WARNING; - return; } } } @@ -856,8 +833,7 @@ moreBytes: utf8->toULength=toULength; utf8->mode=toULimit; break; - } else if(!U8_IS_TRAIL(b=*source)) { - /* lead byte in trail byte position */ + } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) { utf8->toULength=toULength; *pErrorCode=U_ILLEGAL_CHAR_FOUND; break; diff --git a/deps/icu-small/source/common/ucnvlat1.cpp b/deps/icu-small/source/common/ucnvlat1.cpp index 9855ebe6e7..15eeb5c51f 100644 --- a/deps/icu-small/source/common/ucnvlat1.cpp +++ b/deps/icu-small/source/common/ucnvlat1.cpp @@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; + if (utf8->toULength > 0) { + c=(UChar32)utf8->toUnicodeStatus; + } else { + c = 0; + } if(c!=0 && source<sourceLimit) { if(targetCapacity==0) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; @@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, uint8_t c; - if(pToUArgs->converter->toUnicodeStatus!=0) { + if(pToUArgs->converter->toULength > 0) { /* no handling of partial UTF-8 characters here, fall back to pivoting */ *pErrorCode=U_USING_DEFAULT_WARNING; return; diff --git a/deps/icu-small/source/common/ucnvmbcs.cpp b/deps/icu-small/source/common/ucnvmbcs.cpp index e5efa7fc1b..9052394b4f 100644 --- a/deps/icu-small/source/common/ucnvmbcs.cpp +++ b/deps/icu-small/source/common/ucnvmbcs.cpp @@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character. @@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character. diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp index aa9d855f50..d1e5f62a9a 100644 --- a/deps/icu-small/source/common/ucurr.cpp +++ b/deps/icu-small/source/common/ucurr.cpp @@ -17,6 +17,7 @@ #include "unicode/ustring.h" #include "unicode/parsepos.h" #include "ustr_imp.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "uassert.h" @@ -28,9 +29,12 @@ #include "uinvchar.h" #include "uresimp.h" #include "ulist.h" +#include "uresimp.h" #include "ureslocs.h" #include "ulocimp.h" +using namespace icu; + //#define UCURR_DEBUG_EQUIV 1 #ifdef UCURR_DEBUG_EQUIV #include "stdio.h" @@ -104,6 +108,7 @@ static const char VAR_DELIM_STR[] = "_"; // Tag for localized display names (symbols) of currencies static const char CURRENCIES[] = "Currencies"; +static const char CURRENCIES_NARROW[] = "Currencies%narrow"; static const char CURRENCYPLURALS[] = "CurrencyPlurals"; static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0}; @@ -698,7 +703,7 @@ ucurr_getName(const UChar* currency, } int32_t choice = (int32_t) nameStyle; - if (choice < 0 || choice > 1) { + if (choice < 0 || choice > 2) { *ec = U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -731,15 +736,19 @@ ucurr_getName(const UChar* currency, const UChar* s = NULL; ec2 = U_ZERO_ERROR; - UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2); - - rb = ures_getByKey(rb, CURRENCIES, rb, &ec2); - - // Fetch resource with multi-level resource inheritance fallback - rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2); - - s = ures_getStringByIndex(rb, choice, len, &ec2); - ures_close(rb); + LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2)); + + if (nameStyle == UCURR_NARROW_SYMBOL_NAME) { + CharString key; + key.append(CURRENCIES_NARROW, ec2); + key.append("/", ec2); + key.append(buf, ec2); + s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2); + } else { + ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2); + ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2); + s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2); + } // If we've succeeded we're done. Otherwise, try to fallback. // If that fails (because we are already at root) then exit. diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h index c64bb71222..607f3ec625 100644 --- a/deps/icu-small/source/common/unicode/brkiter.h +++ b/deps/icu-small/source/common/unicode/brkiter.h @@ -298,15 +298,14 @@ public: virtual int32_t next(int32_t n) = 0; /** - * For RuleBasedBreakIterators, return the status tag from the - * break rule that determined the most recently - * returned break position. + * For RuleBasedBreakIterators, return the status tag from the break rule + * that determined the boundary at the current iteration position. * <p> * For break iterator types that do not support a rule status, * a default value of 0 is returned. * <p> - * @return the status from the break rule that determined the most recently - * returned break position. + * @return the status from the break rule that determined the boundary at + * the current iteration position. * @see RuleBaseBreakIterator::getRuleStatus() * @see UWordBreak * @stable ICU 52 @@ -315,7 +314,7 @@ public: /** * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) - * that determined the most recently returned break position. + * that determined the boundary at the current iteration position. * <p> * For break iterator types that do not support rule status, * no values are returned. @@ -334,7 +333,7 @@ public: * normal way, without attempting to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined - * the most recent boundary returned by the break iterator. + * the boundary at the current iteration position. * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value * is the total number of status values that were available, * not the reduced number that were actually returned. @@ -616,7 +615,7 @@ public: virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; private: - static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status); + static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status); static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); diff --git a/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/icu-small/source/common/unicode/bytestriebuilder.h index 0f9f5e2c06..7a806bb7f0 100644 --- a/deps/icu-small/source/common/unicode/bytestriebuilder.h +++ b/deps/icu-small/source/common/unicode/bytestriebuilder.h @@ -154,7 +154,6 @@ private: const char *s; }; - // don't use #ifndef U_HIDE_INTERNAL_API with private class members or virtual methods. virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, Node *nextNode) const; diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h index 4a4917bdca..4b77256d74 100644 --- a/deps/icu-small/source/common/unicode/casemap.h +++ b/deps/icu-small/source/common/unicode/casemap.h @@ -18,8 +18,6 @@ U_NAMESPACE_BEGIN -#ifndef U_HIDE_DRAFT_API - class BreakIterator; class ByteSink; class Edits; @@ -27,7 +25,7 @@ class Edits; /** * Low-level C++ case mapping functions. * - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API CaseMap U_FINAL : public UMemory { public: @@ -59,7 +57,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToLower - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toLower( const char *locale, uint32_t options, @@ -95,7 +93,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToUpper - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toUpper( const char *locale, uint32_t options, @@ -146,7 +144,7 @@ public: * * @see u_strToTitle * @see ucasemap_toTitle - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toTitle( const char *locale, uint32_t options, BreakIterator *iter, @@ -188,7 +186,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strFoldCase - * @draft ICU 59 + * @stable ICU 59 */ static int32_t fold( uint32_t options, @@ -196,6 +194,7 @@ public: char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); +#ifndef U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. @@ -318,6 +317,7 @@ public: uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. @@ -347,7 +347,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToLower - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToLower( const char *locale, uint32_t options, @@ -383,7 +383,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToUpper - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToUpper( const char *locale, uint32_t options, @@ -433,7 +433,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToTitle - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToTitle( const char *locale, uint32_t options, BreakIterator *iter, @@ -475,7 +475,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8FoldCase - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8Fold( uint32_t options, @@ -489,8 +489,6 @@ private: CaseMap &operator=(const CaseMap &other) = delete; }; -#endif // U_HIDE_DRAFT_API - U_NAMESPACE_END #endif // __CASEMAP_H__ diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h index fbce177591..49d0e029a9 100644 --- a/deps/icu-small/source/common/unicode/char16ptr.h +++ b/deps/icu-small/source/common/unicode/char16ptr.h @@ -30,25 +30,23 @@ U_NAMESPACE_BEGIN # define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") #endif -// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it -// is now used in place of UChar* in several stable C++ methods /** * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API Char16Ptr U_FINAL { public: /** * Copies the pointer. * @param p pointer - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(char16_t *p); #if !U_CHAR16_IS_TYPEDEF /** * Converts the pointer to char16_t *. * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(uint16_t *p); #endif @@ -57,32 +55,32 @@ public: * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(wchar_t *p); #endif /** * nullptr constructor. * @param p nullptr - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(std::nullptr_t p); /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ inline ~Char16Ptr(); /** * Pointer access. * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline operator char16_t *() const { return get(); } @@ -137,25 +135,23 @@ char16_t *Char16Ptr::get() const { return u_.cp; } #endif -// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it is -// now used in place of const UChar* in several stable C++ methods /** * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API ConstChar16Ptr U_FINAL { public: /** * Copies the pointer. * @param p pointer - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const char16_t *p); #if !U_CHAR16_IS_TYPEDEF /** * Converts the pointer to char16_t *. * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const uint16_t *p); #endif @@ -164,33 +160,33 @@ public: * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const wchar_t *p); #endif /** * nullptr constructor. * @param p nullptr - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const std::nullptr_t p); /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ inline ~ConstChar16Ptr(); /** * Pointer access. * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline const char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline operator const char16_t *() const { return get(); } @@ -250,7 +246,7 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; } * Includes an aliasing barrier if available. * @param p pointer * @return p as const UChar * - * @draft ICU 59 + * @stable ICU 59 */ inline const UChar *toUCharPtr(const char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -264,7 +260,7 @@ inline const UChar *toUCharPtr(const char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as UChar * - * @draft ICU 59 + * @stable ICU 59 */ inline UChar *toUCharPtr(char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -278,7 +274,7 @@ inline UChar *toUCharPtr(char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as const OldUChar * - * @draft ICU 59 + * @stable ICU 59 */ inline const OldUChar *toOldUCharPtr(const char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -292,7 +288,7 @@ inline const OldUChar *toOldUCharPtr(const char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as OldUChar * - * @draft ICU 59 + * @stable ICU 59 */ inline OldUChar *toOldUCharPtr(char16_t *p) { #ifdef U_ALIASING_BARRIER diff --git a/deps/icu-small/source/common/unicode/chariter.h b/deps/icu-small/source/common/unicode/chariter.h index dbed89dbe6..292794f6d6 100644 --- a/deps/icu-small/source/common/unicode/chariter.h +++ b/deps/icu-small/source/common/unicode/chariter.h @@ -569,7 +569,7 @@ public: * Returns the numeric index in the underlying text-storage * object of the character the iterator currently refers to * (i.e., the character returned by current()). - * @return the numberic index in the text-storage object of + * @return the numeric index in the text-storage object of * the character the iterator currently refers to * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/common/unicode/dtintrv.h b/deps/icu-small/source/common/unicode/dtintrv.h index 2221b36c9b..c99011e26c 100644 --- a/deps/icu-small/source/common/unicode/dtintrv.h +++ b/deps/icu-small/source/common/unicode/dtintrv.h @@ -69,7 +69,7 @@ public: * <pre> * . Base* polymorphic_pointer = createPolymorphicObject(); * . if (polymorphic_pointer->getDynamicClassID() == - * . erived::getStaticClassID()) ... + * . derived::getStaticClassID()) ... * </pre> * @return The class ID for all objects of this class. * @stable ICU 4.0 diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h index 082c3733a8..5a72574c14 100644 --- a/deps/icu-small/source/common/unicode/edits.h +++ b/deps/icu-small/source/common/unicode/edits.h @@ -17,8 +17,6 @@ U_NAMESPACE_BEGIN -#ifndef U_HIDE_DRAFT_API - /** * Records lengths of string edits but not replacement text. * Supports replacements, insertions, deletions in linear progression. @@ -27,13 +25,13 @@ U_NAMESPACE_BEGIN * An Edits object tracks a separate UErrorCode, but ICU string transformation functions * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. * - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API Edits U_FINAL : public UMemory { public: /** * Constructs an empty object. - * @draft ICU 59 + * @stable ICU 59 */ Edits() : array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), @@ -64,7 +62,7 @@ public: /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ ~Edits(); @@ -88,20 +86,20 @@ public: /** * Resets the data but may not release memory. - * @draft ICU 59 + * @stable ICU 59 */ void reset() U_NOEXCEPT; /** * Adds a record for an unchanged segment of text. * Normally called from inside ICU string transformation functions, not user code. - * @draft ICU 59 + * @stable ICU 59 */ void addUnchanged(int32_t unchangedLength); /** * Adds a record for a text replacement/insertion/deletion. * Normally called from inside ICU string transformation functions, not user code. - * @draft ICU 59 + * @stable ICU 59 */ void addReplace(int32_t oldLength, int32_t newLength); /** @@ -112,33 +110,35 @@ public: * and an error occurred while recording edits. * Otherwise unchanged. * @return TRUE if U_FAILURE(outErrorCode) - * @draft ICU 59 + * @stable ICU 59 */ UBool copyErrorTo(UErrorCode &outErrorCode); /** * How much longer is the new text compared with the old text? * @return new length minus old length - * @draft ICU 59 + * @stable ICU 59 */ int32_t lengthDelta() const { return delta; } /** * @return TRUE if there are any change edits - * @draft ICU 59 + * @stable ICU 59 */ UBool hasChanges() const { return numChanges != 0; } +#ifndef U_HIDE_DRAFT_API /** * @return the number of change edits * @draft ICU 60 */ int32_t numberOfChanges() const { return numChanges; } +#endif // U_HIDE_DRAFT_API /** * Access to the list of edits. * @see getCoarseIterator * @see getFineIterator - * @draft ICU 59 + * @stable ICU 59 */ struct U_COMMON_API Iterator U_FINAL : public UMemory { /** @@ -152,12 +152,12 @@ public: srcIndex(0), replIndex(0), destIndex(0) {} /** * Copy constructor. - * @draft ICU 59 + * @stable ICU 59 */ Iterator(const Iterator &other) = default; /** * Assignment operator. - * @draft ICU 59 + * @stable ICU 59 */ Iterator &operator=(const Iterator &other) = default; @@ -167,7 +167,7 @@ public: * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return TRUE if there is another edit - * @draft ICU 59 + * @stable ICU 59 */ UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } @@ -188,12 +188,13 @@ public: * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return TRUE if the edit for the source index was found - * @draft ICU 59 + * @stable ICU 59 */ UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { return findIndex(i, TRUE, errorCode) == 0; } +#ifndef U_HIDE_DRAFT_API /** * Finds the edit that contains the destination index. * The destination index may be found in a non-change @@ -264,39 +265,40 @@ public: * @draft ICU 60 */ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API /** * @return TRUE if this edit replaces oldLength() units with newLength() different ones. * FALSE if oldLength units remain unchanged. - * @draft ICU 59 + * @stable ICU 59 */ UBool hasChange() const { return changed; } /** * @return the number of units in the original string which are replaced or remain unchanged. - * @draft ICU 59 + * @stable ICU 59 */ int32_t oldLength() const { return oldLength_; } /** * @return the number of units in the modified string, if hasChange() is TRUE. * Same as oldLength if hasChange() is FALSE. - * @draft ICU 59 + * @stable ICU 59 */ int32_t newLength() const { return newLength_; } /** * @return the current index into the source string - * @draft ICU 59 + * @stable ICU 59 */ int32_t sourceIndex() const { return srcIndex; } /** * @return the current index into the replacement-characters-only string, * not counting unchanged spans - * @draft ICU 59 + * @stable ICU 59 */ int32_t replacementIndex() const { return replIndex; } /** * @return the current index into the full destination string - * @draft ICU 59 + * @stable ICU 59 */ int32_t destinationIndex() const { return destIndex; } @@ -331,7 +333,7 @@ public: * Returns an Iterator for coarse-grained changes for simple string updates. * Skips non-changes. * @return an Iterator that merges adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getCoarseChangesIterator() const { return Iterator(array, length, TRUE, TRUE); @@ -340,7 +342,7 @@ public: /** * Returns an Iterator for coarse-grained changes and non-changes for simple string updates. * @return an Iterator that merges adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getCoarseIterator() const { return Iterator(array, length, FALSE, TRUE); @@ -350,7 +352,7 @@ public: * Returns an Iterator for fine-grained changes for modifying styled text. * Skips non-changes. * @return an Iterator that separates adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getFineChangesIterator() const { return Iterator(array, length, TRUE, FALSE); @@ -359,12 +361,13 @@ public: /** * Returns an Iterator for fine-grained changes and non-changes for modifying styled text. * @return an Iterator that separates adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getFineIterator() const { return Iterator(array, length, FALSE, FALSE); } +#ifndef U_HIDE_DRAFT_API /** * Merges the two input Edits and appends the result to this object. * @@ -393,6 +396,7 @@ public: * @draft ICU 60 */ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API private: void releaseArray() U_NOEXCEPT; @@ -415,8 +419,6 @@ private: uint16_t stackArray[STACK_CAPACITY]; }; -#endif // U_HIDE_DRAFT_API - U_NAMESPACE_END #endif // __EDITS_H__ diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h index a0319bf0a7..751d1faf40 100644 --- a/deps/icu-small/source/common/unicode/filteredbrk.h +++ b/deps/icu-small/source/common/unicode/filteredbrk.h @@ -64,9 +64,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @deprecated ICU 60 use createEmptyInstance instead * @see createEmptyInstance() */ - static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) { - return createEmptyInstance(status); - } + static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); #endif /* U_HIDE_DEPRECATED_API */ #ifndef U_HIDE_DRAFT_API @@ -105,7 +103,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { */ virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; -#ifndef U_HIDE_DEPRECATED_API /** * This function has been deprecated in favor of wrapIteratorWithFilter() * The behavior is identical. @@ -116,7 +113,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @see wrapBreakIteratorWithFilter() */ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; -#endif /* U_HIDE_DEPRECATED_API */ #ifndef U_HIDE_DRAFT_API /** diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index c752344f33..c84774e07f 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -353,7 +353,7 @@ public: * the default locale ID of the runtime environment. * * @param newLocale Locale to set to. If NULL, set to the value obtained - * from the runtime environement. + * from the runtime environment. * @param success The error code. * @system * @stable ICU 2.0 @@ -629,7 +629,7 @@ public: /** * Fills in "name" with the name of this locale in a format suitable for user display - * in the locale specfied by "displayLocale". This function uses getDisplayLanguage(), + * in the locale specified by "displayLocale". This function uses getDisplayLanguage(), * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display * name in the format "language (country[,variant])". For example, if displayLocale is * fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's diff --git a/deps/icu-small/source/common/unicode/parseerr.h b/deps/icu-small/source/common/unicode/parseerr.h index c8283bfcc9..c05487601c 100644 --- a/deps/icu-small/source/common/unicode/parseerr.h +++ b/deps/icu-small/source/common/unicode/parseerr.h @@ -58,9 +58,9 @@ enum { U_PARSE_CONTEXT_LEN = 16 }; typedef struct UParseError { /** - * The line on which the error occured. If the parser uses this + * The line on which the error occurred. If the parser uses this * field, it sets it to the line number of the source text line on - * which the error appears, which will be be a value >= 1. If the + * which the error appears, which will be a value >= 1. If the * parse does not support line numbers, the value will be <= 0. * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index 12e2929d24..a3f8d32f89 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -482,9 +482,9 @@ /* Otherwise use the predefined value. */ #elif !defined(__cplusplus) # define U_CPLUSPLUS_VERSION 0 -#elif __cplusplus >= 201402L +#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) # define U_CPLUSPLUS_VERSION 14 -#elif __cplusplus >= 201103L +#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) # define U_CPLUSPLUS_VERSION 11 #else // C++98 or C++03 @@ -631,7 +631,7 @@ namespace std { */ #ifdef U_CHARSET_IS_UTF8 /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED +#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED # define U_CHARSET_IS_UTF8 1 #else # define U_CHARSET_IS_UTF8 0 @@ -749,8 +749,10 @@ namespace std { #else /* * Notes: - * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but - * does not support u"abc" string literals. + * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef + * and does not support u"abc" string literals. + * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for + * both char16_t and u"abc" string literals. * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but * does not support u"abc" string literals. * C++11 and C11 require support for UTF-16 literals diff --git a/deps/icu-small/source/common/unicode/putil.h b/deps/icu-small/source/common/unicode/putil.h index 91d6bb10f7..14bb99ccc5 100644 --- a/deps/icu-small/source/common/unicode/putil.h +++ b/deps/icu-small/source/common/unicode/putil.h @@ -38,7 +38,7 @@ /** * Platform utilities isolates the platform dependencies of the - * libarary. For each platform which this code is ported to, these + * library. For each platform which this code is ported to, these * functions may have to be re-implemented. */ @@ -53,7 +53,7 @@ * The data directory is determined as follows: * If u_setDataDirectory() has been called, that is it, otherwise * if the ICU_DATA environment variable is set, use that, otherwise - * If a data directory was specifed at ICU build time + * If a data directory was specified at ICU build time * <code> * \code * #define ICU_DATA_DIR "path" @@ -93,7 +93,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); #ifndef U_HIDE_INTERNAL_API /** * Return the time zone files override directory, or an empty string if - * no directory was specified. Certain time zone resources will be preferrentially + * no directory was specified. Certain time zone resources will be preferentially * loaded from individual files in this directory. * * @return the time zone data override directory. diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index c3c201dd35..0c41d69d23 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -29,7 +29,6 @@ #include "unicode/udata.h" #include "unicode/parseerr.h" #include "unicode/schriter.h" -#include "unicode/uchriter.h" U_NAMESPACE_BEGIN @@ -58,34 +57,18 @@ private: * The UText through which this BreakIterator accesses the text * @internal */ - UText *fText; - - /** - * A character iterator that refers to the same text as the UText, above. - * Only included for compatibility with old API, which was based on CharacterIterators. - * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. - */ - CharacterIterator *fCharIter; - - /** - * When the input text is provided by a UnicodeString, this will point to - * a characterIterator that wraps that data. Needed only for the - * implementation of getText(), a backwards compatibility issue. - */ - StringCharacterIterator *fSCharIter; - - /** - * When the input text is provided by a UText, this - * dummy CharacterIterator over an empty string will - * be returned from getText() - */ - UCharCharacterIterator *fDCharIter; + UText fText; +#ifndef U_HIDE_INTERNAL_API +public: +#endif /* U_HIDE_INTERNAL_API */ /** - * The rule data for this BreakIterator instance + * The rule data for this BreakIterator instance. + * Not for general use; Public only for testing purposes. * @internal */ RBBIDataWrapper *fData; +private: /** * The iteration state - current position, rule status for the current position, @@ -106,23 +89,10 @@ private: int32_t fRuleStatusIndex; /** - * True when iteration has run off the end, and iterator functions should return UBRK_DONE. - */ - UBool fDone; - - /** * Cache of previously determined boundary positions. */ - public: // TODO: debug, return to private. class BreakCache; BreakCache *fBreakCache; - private: - /** - * Counter for the number of characters encountered with the "dictionary" - * flag set. - * @internal - */ - uint32_t fDictionaryCharCount; /** * Cache of boundary positions within a region of text that has been @@ -150,11 +120,30 @@ private: UnhandledEngine *fUnhandledBreakEngine; /** - * - * The type of the break iterator, or -1 if it has not been set. + * Counter for the number of characters encountered with the "dictionary" + * flag set. * @internal */ - int32_t fBreakType; + uint32_t fDictionaryCharCount; + + /** + * A character iterator that refers to the same text as the UText, above. + * Only included for compatibility with old API, which was based on CharacterIterators. + * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. + */ + CharacterIterator *fCharIter; + + /** + * When the input text is provided by a UnicodeString, this will point to + * a characterIterator that wraps that data. Needed only for the + * implementation of getText(), a backwards compatibility issue. + */ + StringCharacterIterator fSCharIter; + + /** + * True when iteration has run off the end, and iterator functions should return UBRK_DONE. + */ + UBool fDone; //======================================================================= // constructors @@ -206,17 +195,17 @@ public: UErrorCode &status); /** - * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules. + * Construct a RuleBasedBreakIterator from a set of precompiled binary rules. * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). * Construction of a break iterator in this way is substantially faster than - * constuction from source rules. + * construction from source rules. * * Ownership of the storage containing the compiled rules remains with the * caller of this function. The compiled rules must not be modified or * deleted during the life of the break iterator. * * The compiled rules are not compatible across different major versions of ICU. - * The compiled rules are comaptible only between machines with the same + * The compiled rules are compatible only between machines with the same * byte ordering (little or big endian) and the same base character set family * (ASCII or EBCDIC). * @@ -285,7 +274,7 @@ public: * behavior, and iterating over the same text, as this one. * Differs from the copy constructor in that it is polymorphic, and * will correctly clone (copy) a derived class. - * clone() is thread safe. Multiple threads may simultaeneously + * clone() is thread safe. Multiple threads may simultaneously * clone the same source break iterator. * @return a newly-constructed RuleBasedBreakIterator * @stable ICU 2.0 @@ -450,7 +439,7 @@ public: virtual int32_t preceding(int32_t offset); /** - * Returns true if the specfied position is a boundary position. As a side + * Returns true if the specified position is a boundary position. As a side * effect, leaves the iterator pointing to the first boundary position at * or after "offset". * @param offset the offset to check. @@ -471,8 +460,8 @@ public: /** - * Return the status tag from the break rule that determined the most recently - * returned break position. For break rules that do not specify a + * Return the status tag from the break rule that determined the boundary at + * the current iteration position. For break rules that do not specify a * status, a default value of 0 is returned. If more than one break rule * would cause a boundary to be located at some position in the text, * the numerically largest of the applicable status values is returned. @@ -489,16 +478,14 @@ public: * position from <code>next()</code>, <code>previous()</code>, or * any other break iterator functions that returns a boundary position. * <p> + * Note that <code>getRuleStatus()</code> returns the value corresponding to + * <code>current()</code> index even after <code>next()</code> has returned DONE. + * <p> * When creating custom break rules, one is free to define whatever * status values may be convenient for the application. * <p> - * Note: this function is not thread safe. It should not have been - * declared const, and the const remains only for compatibility - * reasons. (The function is logically const, but not bit-wise const). - * TODO: check this. Probably thread safe now. - * <p> - * @return the status from the break rule that determined the most recently - * returned break position. + * @return the status from the break rule that determined the boundary + * at the current iteration position. * * @see UWordBreak * @stable ICU 2.2 @@ -506,8 +493,8 @@ public: virtual int32_t getRuleStatus() const; /** - * Get the status (tag) values from the break rule(s) that determined the most - * recently returned break position. + * Get the status (tag) values from the break rule(s) that determined the boundary + * at the current iteration position. * <p> * The returned status value(s) are stored into an array provided by the caller. * The values are stored in sorted (ascending) order. @@ -518,10 +505,10 @@ public: * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the - * normal way, without attemtping to store any values. + * normal way, without attempting to store any values. * @param status receives error codes. - * @return The number of rule status values from rules that determined - * the most recent boundary returned by the break iterator. + * @return The number of rule status values from the rules that determined + * the boundary at the current iteration position. * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value * is the total number of status values that were available, * not the reduced number that were actually returned. @@ -561,7 +548,7 @@ public: * * Create a clone (copy) of this break iterator in memory provided * by the caller. The idea is to increase performance by avoiding - * a storage allocation. Use of this functoin is NOT RECOMMENDED. + * a storage allocation. Use of this function is NOT RECOMMENDED. * Performance gains are minimal, and correct buffer management is * tricky. Use clone() instead. * @@ -574,7 +561,7 @@ public: * storage for the cloned object. * * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be - * returned if the the provided buffer was too small, and + * returned if the provided buffer was too small, and * the clone was therefore put on the heap. * * @return Pointer to the clone object. This may differ from the stackBuffer @@ -597,7 +584,7 @@ public: * The binary data can only be used with the same version of ICU * and on the same platform type (processor endian-ness) * - * @param length Returns the length of the binary data. (Out paramter.) + * @param length Returns the length of the binary data. (Out parameter.) * * @return A pointer to the binary (compiled) rule data. The storage * belongs to the RulesBasedBreakIterator object, not the @@ -646,12 +633,6 @@ private: void reset(void); /** - * Set the type of the break iterator. - * @internal - */ - void setBreakType(int32_t type); - - /** * Common initialization function, used by constructors and bufferClone. * @internal */ @@ -697,6 +678,13 @@ private: * @internal */ void dumpCache(); + + /** + * Debugging function only. + * @internal + */ + void dumpTables(); + #endif /* U_HIDE_INTERNAL_API */ }; diff --git a/deps/icu-small/source/common/unicode/resbund.h b/deps/icu-small/source/common/unicode/resbund.h index 358ed7eeb9..ab0b60bbb2 100644 --- a/deps/icu-small/source/common/unicode/resbund.h +++ b/deps/icu-small/source/common/unicode/resbund.h @@ -132,7 +132,7 @@ public: ResourceBundle(UErrorCode &err); /** - * Standard constructor, onstructs a resource bundle for the locale-specific + * Standard constructor, constructs a resource bundle for the locale-specific * bundle in the specified package. * * @param packageName The packageName and locale together point to an ICU udata object, diff --git a/deps/icu-small/source/common/unicode/schriter.h b/deps/icu-small/source/common/unicode/schriter.h index d83a57f8d0..1a12769e8d 100644 --- a/deps/icu-small/source/common/unicode/schriter.h +++ b/deps/icu-small/source/common/unicode/schriter.h @@ -69,7 +69,7 @@ public: * Create an iterator over the UnicodeString referred to by "textStr". * The UnicodeString object is copied. * The iteration range begins with the code unit specified by - * "textBegin" and ends with the code unit BEFORE the code unit specfied + * "textBegin" and ends with the code unit BEFORE the code unit specified * by "textEnd". The starting position is specified by "textPos". If * "textBegin" and "textEnd" don't form a valid range on "text" (i.e., * textBegin >= textEnd or either is negative or greater than text.size()), diff --git a/deps/icu-small/source/common/unicode/ubidi.h b/deps/icu-small/source/common/unicode/ubidi.h index ef21f24206..254a5bf9ef 100644 --- a/deps/icu-small/source/common/unicode/ubidi.h +++ b/deps/icu-small/source/common/unicode/ubidi.h @@ -692,7 +692,7 @@ typedef enum UBiDiReorderingMode { * @stable ICU 3.6 */ UBIDI_REORDER_DEFAULT = 0, /** Logical to Visual algorithm which handles numbers in a way which - * mimicks the behavior of Windows XP. + * mimics the behavior of Windows XP. * @stable ICU 3.6 */ UBIDI_REORDER_NUMBERS_SPECIAL, /** Logical to Visual algorithm grouping numbers with adjacent R characters @@ -1142,7 +1142,7 @@ ubidi_setContext(UBiDi *pBiDi, /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>, + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, * version 13, * also described in The Unicode Standard, Version 4.0 .<p> * diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h index 600328c49c..73c1553b24 100644 --- a/deps/icu-small/source/common/unicode/ubrk.h +++ b/deps/icu-small/source/common/unicode/ubrk.h @@ -268,7 +268,6 @@ ubrk_openRules(const UChar *rules, UParseError *parseErr, UErrorCode *status); -#ifndef U_HIDE_DRAFT_API /** * Open a new UBreakIterator for locating text boundaries using precompiled binary rules. * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules. @@ -287,15 +286,13 @@ ubrk_openRules(const UChar *rules, * @param status Pointer to UErrorCode to receive any errors. * @return UBreakIterator for the specified rules. * @see ubrk_getBinaryRules - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT UBreakIterator* U_EXPORT2 +U_STABLE UBreakIterator* U_EXPORT2 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar * text, int32_t textLength, UErrorCode * status); -#endif /* U_HIDE_DRAFT_API */ - /** * Thread safe cloning operation * @param bi iterator to be cloned @@ -510,7 +507,7 @@ ubrk_countAvailable(void); /** -* Returns true if the specfied position is a boundary position. As a side +* Returns true if the specified position is a boundary position. As a side * effect, leaves the iterator pointing to the first boundary position at * or after "offset". * @param bi The break iterator to use. @@ -544,7 +541,7 @@ ubrk_getRuleStatus(UBreakIterator *bi); * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the - * normal way, without attemtping to store any values. + * normal way, without attempting to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined * the most recent boundary returned by the break iterator. @@ -596,7 +593,6 @@ ubrk_refreshUText(UBreakIterator *bi, UErrorCode *status); -#ifndef U_HIDE_DRAFT_API /** * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator. * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator @@ -620,15 +616,13 @@ ubrk_refreshUText(UBreakIterator *bi, * otherwise 0. If not preflighting and this is larger than * rulesCapacity, *status will be set to an error. * @see ubrk_openBinaryRules - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ubrk_getBinaryRules(UBreakIterator *bi, uint8_t * binaryRules, int32_t rulesCapacity, UErrorCode * status); -#endif /* U_HIDE_DRAFT_API */ - #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ #endif diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h index 3613374d9a..4b72ecfc26 100644 --- a/deps/icu-small/source/common/unicode/uchar.h +++ b/deps/icu-small/source/common/unicode/uchar.h @@ -112,11 +112,11 @@ U_CDECL_BEGIN * Comparison: * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; * most of general categories "Z" (separators) + most whitespace ISO controls - * (including no-break spaces, but excluding IS1..IS4 and ZWSP) + * (including no-break spaces, but excluding IS1..IS4) * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) * - u_isspace: Z + whitespace ISO controls (including no-break spaces) - * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP + * - u_isblank: "horizontal spaces" = TAB + Zs */ /** @@ -2702,8 +2702,7 @@ u_isgraph(UChar32 c); * * same as * - * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) - * except Zero Width Space (ZWSP, U+200B). + * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators). * * Note: There are several ICU whitespace functions; please see the uchar.h * file documentation for a detailed comparison. diff --git a/deps/icu-small/source/common/unicode/uclean.h b/deps/icu-small/source/common/unicode/uclean.h index 3f73af37b8..ab0cd6da6b 100644 --- a/deps/icu-small/source/common/unicode/uclean.h +++ b/deps/icu-small/source/common/unicode/uclean.h @@ -70,7 +70,7 @@ u_init(UErrorCode *status); * This has the effect of restoring ICU to its initial condition, before * any of these override functions were installed. Refer to * u_setMemoryFunctions(), u_setMutexFunctions and - * utrace_setFunctions(). If ICU is to be reinitialized after after + * utrace_setFunctions(). If ICU is to be reinitialized after * calling u_cleanup(), these runtime override functions will need to * be set up again if they are still required. * <p> @@ -104,7 +104,7 @@ u_cleanup(void); U_CDECL_BEGIN /** * Pointer type for a user supplied memory allocation function. - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 @@ -113,7 +113,7 @@ U_CDECL_BEGIN typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size); /** * Pointer type for a user supplied memory re-allocation function. - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 @@ -123,7 +123,7 @@ typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t si /** * Pointer type for a user supplied memory free function. Behavior should be * similar the standard C library free(). - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param mem Pointer to the memory block to be resized * @param size The new size for the block * @return Pointer to the resized memory block, or NULL if the resizing failed. @@ -179,8 +179,8 @@ U_CDECL_BEGIN * The user-supplied function will be called by ICU whenever ICU needs to create a * new mutex. The function implementation should create a mutex, and store a pointer * to something that uniquely identifies the mutex into the UMTX that is supplied - * as a paramter. - * @param context user supplied value, obtained from from u_setMutexFunctions(). + * as a parameter. + * @param context user supplied value, obtained from u_setMutexFunctions(). * @param mutex Receives a pointer that identifies the new mutex. * The mutex init function must set the UMTX to a non-null value. * Subsequent calls by ICU to lock, unlock, or destroy a mutex will @@ -197,7 +197,7 @@ typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCod * Function Pointer type for a user supplied mutex functions. * One of the user-supplied functions with this signature will be called by ICU * whenever ICU needs to lock, unlock, or destroy a mutex. - * @param context user supplied value, obtained from from u_setMutexFunctions(). + * @param context user supplied value, obtained from u_setMutexFunctions(). * @param mutex specify the mutex on which to operate. * @deprecated ICU 52. This function is no longer supported. * @system @@ -229,7 +229,7 @@ u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtx /** * Pointer type for a user supplied atomic increment or decrement function. - * @param context user supplied value, obtained from from u_setAtomicIncDecFunctions(). + * @param context user supplied value, obtained from u_setAtomicIncDecFunctions(). * @param p Pointer to a 32 bit int to be incremented or decremented * @return The value of the variable after the inc or dec operation. * @deprecated ICU 52. This function is no longer supported. diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h index 05d0050f4a..53b4c6f073 100644 --- a/deps/icu-small/source/common/unicode/ucnv.h +++ b/deps/icu-small/source/common/unicode/ucnv.h @@ -207,7 +207,7 @@ typedef void (U_EXPORT2 *UConverterToUCallback) ( /** * Function pointer for error callback in the unicode to codepage direction. - * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason). + * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason). * @param context Pointer to the callback's private data * @param args Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence @@ -353,7 +353,7 @@ ucnv_compareNames(const char *name1, const char *name2); * ucnv_getAlias for a complete list that is available. * If this parameter is NULL, the default converter will be used. * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred * @see ucnv_openU * @see ucnv_openCCSID * @see ucnv_getAvailableName @@ -386,7 +386,7 @@ ucnv_open(const char *converterName, UErrorCode *err); * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, * U_FILE_ACCESS_ERROR</TT> * @return the created Unicode converter object, or <TT>NULL</TT> if an - * error occured + * error occurred * @see ucnv_open * @see ucnv_openCCSID * @see ucnv_close @@ -489,7 +489,7 @@ ucnv_openCCSID(int32_t codepage, * @param packageName name of the package (equivalent to 'path' in udata_open() call) * @param converterName name of the data item to be used, without suffix. * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred * @see udata_open * @see ucnv_open * @see ucnv_safeClone diff --git a/deps/icu-small/source/common/unicode/ucnv_err.h b/deps/icu-small/source/common/unicode/ucnv_err.h index e8a79bcd81..08c96c1440 100644 --- a/deps/icu-small/source/common/unicode/ucnv_err.h +++ b/deps/icu-small/source/common/unicode/ucnv_err.h @@ -119,19 +119,19 @@ typedef struct UConverter UConverter; #define UCNV_ESCAPE_JAVA "J" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) - * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) + * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_C "C" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_DEC "D" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_HEX "X" @@ -171,7 +171,7 @@ typedef enum { code points. The error code U_INVALID_CHAR_FOUND will be set. */ UCNV_RESET = 3, /**< The callback is called with this reason when a - 'reset' has occured. Callback should reset all + 'reset' has occurred. Callback should reset all state. */ UCNV_CLOSE = 4, /**< Called when the converter is closed. The callback should release any allocated memory.*/ @@ -199,7 +199,7 @@ typedef struct { const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ } UConverterFromUnicodeArgs; @@ -215,7 +215,7 @@ typedef struct { const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ } UConverterToUnicodeArgs; diff --git a/deps/icu-small/source/common/unicode/ucurr.h b/deps/icu-small/source/common/unicode/ucurr.h index ecb54d146f..adfaf0023b 100644 --- a/deps/icu-small/source/common/unicode/ucurr.h +++ b/deps/icu-small/source/common/unicode/ucurr.h @@ -103,6 +103,19 @@ typedef enum UCurrNameStyle { * @stable ICU 2.6 */ UCURR_LONG_NAME + +#ifndef U_HIDE_DRAFT_API + , + /** + * Selector for getName() indicating the narrow currency symbol. + * The narrow currency symbol is similar to the regular currency + * symbol, but it always takes the shortest form: for example, + * "$" instead of "US$" for USD in en-CA. + * + * @draft ICU 61 + */ + UCURR_NARROW_SYMBOL_NAME +#endif // U_HIDE_DRAFT_API } UCurrNameStyle; #if !UCONFIG_NO_SERVICE diff --git a/deps/icu-small/source/common/unicode/umachine.h b/deps/icu-small/source/common/unicode/umachine.h index 30de4dba0d..a9dc1631b0 100644 --- a/deps/icu-small/source/common/unicode/umachine.h +++ b/deps/icu-small/source/common/unicode/umachine.h @@ -299,6 +299,10 @@ typedef int8_t UBool; // for AIX, uchar.h needs to be included # include <uchar.h> # define U_CHAR16_IS_TYPEDEF 1 +#elif defined(_MSC_VER) && (_MSC_VER < 1900) +// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, +// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx +# define U_CHAR16_IS_TYPEDEF 1 #else # define U_CHAR16_IS_TYPEDEF 0 #endif @@ -366,7 +370,7 @@ typedef int8_t UBool; * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. * The current UChar responds to UCHAR_TYPE but OldUChar does not. * - * @draft ICU 59 + * @stable ICU 59 */ #if U_SIZEOF_WCHAR_T==2 typedef wchar_t OldUChar; diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index 914818a00e..c2e0ad48bd 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -1521,6 +1521,7 @@ private: UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, UErrorCode& ec); //---------------------------------------------------------------- diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index b99a686126..d0b271754b 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -2995,10 +2995,6 @@ public: */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * uint16_t * constructor. @@ -3008,16 +3004,12 @@ public: * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> * on the compiler command line or similar. * @param text NUL-terminated UTF-16 string - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : UnicodeString(ConstChar16Ptr(text)) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. @@ -3028,16 +3020,12 @@ public: * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> * on the compiler command line or similar. * @param text NUL-terminated UTF-16 string - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : UnicodeString(ConstChar16Ptr(text)) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. @@ -3046,7 +3034,7 @@ public: * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> * on the compiler command line or similar. * @param text nullptr - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); @@ -3060,26 +3048,18 @@ public: UnicodeString(const char16_t *text, int32_t textLength); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * uint16_t * constructor. * Delegates to UnicodeString(const char16_t *, int32_t). * @param text UTF-16 string * @param length string length - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(const uint16_t *text, int32_t length) : UnicodeString(ConstChar16Ptr(text), length) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. @@ -3087,22 +3067,18 @@ public: * Delegates to UnicodeString(const char16_t *, int32_t). * @param text NUL-terminated UTF-16 string * @param length string length - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(const wchar_t *text, int32_t length) : UnicodeString(ConstChar16Ptr(text), length) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. * @param text nullptr * @param length ignored - * @draft ICU 59 + * @stable ICU 59 */ inline UnicodeString(const std::nullptr_t text, int32_t length); @@ -3152,10 +3128,6 @@ public: */ UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * Writable-aliasing uint16_t * constructor. @@ -3163,16 +3135,12 @@ public: * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * Writable-aliasing wchar_t * constructor. @@ -3181,23 +3149,19 @@ public: * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * Writable-aliasing nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. * @param buffer nullptr * @param buffLength ignored * @param buffCapacity ignored - * @draft ICU 59 + * @stable ICU 59 */ inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index 982655c442..d8ab85091f 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -107,7 +107,6 @@ #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) #define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) -#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl) #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) #define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) #define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) @@ -446,7 +445,6 @@ #define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) #define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) #define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) -#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton) #define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) #define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) #define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) @@ -551,6 +549,7 @@ #define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) #define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) #define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) +#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie) #define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) #define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) #define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) @@ -862,6 +861,7 @@ #define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) #define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) #define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) +#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName) #define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) #define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) #define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) @@ -1326,7 +1326,6 @@ #define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) #define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) #define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) -#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties) #define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) #define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) #define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) diff --git a/deps/icu-small/source/common/unicode/ures.h b/deps/icu-small/source/common/unicode/ures.h index 918b9f208e..af0ce76f25 100644 --- a/deps/icu-small/source/common/unicode/ures.h +++ b/deps/icu-small/source/common/unicode/ures.h @@ -16,7 +16,7 @@ * 04/04/99 helena Fixed internal header inclusion. * 04/15/99 Madhu Updated Javadoc * 06/14/99 stephen Removed functions taking a filename suffix. -* 07/20/99 stephen Language-independent ypedef to void* +* 07/20/99 stephen Language-independent typedef to void* * 11/09/99 weiv Added ures_getLocale() * 06/24/02 weiv Added support for resource sharing ****************************************************************************** @@ -138,7 +138,7 @@ typedef enum { /** * Opens a UResourceBundle, from which users can extract strings by using * their corresponding keys. - * Note that the caller is responsible of calling <TT>ures_close</TT> on each succesfully + * Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully * opened resource bundle. * @param packageName The packageName and locale together point to an ICU udata object, * as defined by <code> udata_open( packageName, "res", locale, err) </code> @@ -301,7 +301,7 @@ ures_getVersion(const UResourceBundle* resB, * you to query for the real locale of the resource. For example, if you requested * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. * For subresources, the locale where this resource comes from will be returned. - * If fallback has occured, getLocale will reflect this. + * If fallback has occurred, getLocale will reflect this. * * @param resourceBundle resource bundle in question * @param status just for catching illegal arguments @@ -580,7 +580,7 @@ ures_hasNext(const UResourceBundle *resourceBundle); * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. * Alternatively, you can supply a struct to be filled by this function. * @param status fills in the outgoing error code. You may still get a non NULL result even if an - * error occured. Check status instead. + * error occurred. Check status instead. * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it * @stable ICU 2.0 */ @@ -596,7 +596,7 @@ ures_getNextResource(UResourceBundle *resourceBundle, * @param resourceBundle a resource * @param len fill in length of the string * @param key fill in for key associated with this string. NULL if no key - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 @@ -615,7 +615,7 @@ ures_getNextString(UResourceBundle *resourceBundle, * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. * Alternatively, you can supply a struct to be filled by this function. * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has - * occured. Check status instead. + * occurred. Check status instead. * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it * @stable ICU 2.0 */ @@ -631,7 +631,7 @@ ures_getByIndex(const UResourceBundle *resourceBundle, * @param resourceBundle a resource * @param indexS an index to the wanted string. * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 @@ -722,7 +722,7 @@ ures_getByKey(const UResourceBundle *resourceBundle, * @param resB a resource * @param key a key associated with the wanted string * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 diff --git a/deps/icu-small/source/common/unicode/uscript.h b/deps/icu-small/source/common/unicode/uscript.h index 3ec235d50c..0befa1cd42 100644 --- a/deps/icu-small/source/common/unicode/uscript.h +++ b/deps/icu-small/source/common/unicode/uscript.h @@ -476,7 +476,7 @@ typedef enum UScriptCode { * @param nameOrAbbrOrLocale name of the script, as given in * PropertyValueAliases.txt, or ISO 15924 code or locale * @param fillIn the UScriptCode buffer to fill in the script code - * @param capacity the capacity (size) fo UScriptCode buffer passed in. + * @param capacity the capacity (size) of UScriptCode buffer passed in. * @param err the error status code. * @return The number of script codes filled in the buffer passed in * @stable ICU 2.4 diff --git a/deps/icu-small/source/common/unicode/ushape.h b/deps/icu-small/source/common/unicode/ushape.h index 5af8ffe1c5..3064e08572 100644 --- a/deps/icu-small/source/common/unicode/ushape.h +++ b/deps/icu-small/source/common/unicode/ushape.h @@ -93,7 +93,7 @@ * which must not indicate a failure before the function call. * * @return The number of UChars written to the destination buffer. - * If an error occured, then no output was written, or it may be + * If an error occurred, then no output was written, or it may be * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then * the return value indicates the necessary destination buffer size. * @stable ICU 2.0 diff --git a/deps/icu-small/source/common/unicode/usprep.h b/deps/icu-small/source/common/unicode/usprep.h index 33ca1461ce..7cdc6cdd18 100644 --- a/deps/icu-small/source/common/unicode/usprep.h +++ b/deps/icu-small/source/common/unicode/usprep.h @@ -33,14 +33,14 @@ * StringPrep prepares Unicode strings for use in network protocols. * Profiles of StingPrep are set of rules and data according to with the * Unicode Strings are prepared. Each profiles contains tables which describe - * how a code point should be treated. The tables are broadly classied into + * how a code point should be treated. The tables are broadly classified into * <ul> - * <li> Unassinged Table: Contains code points that are unassigned + * <li> Unassigned Table: Contains code points that are unassigned * in the Unicode Version supported by StringPrep. Currently * RFC 3454 supports Unicode 3.2. </li> - * <li> Prohibited Table: Contains code points that are prohibted from + * <li> Prohibited Table: Contains code points that are prohibited from * the output of the StringPrep processing function. </li> - * <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li> + * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li> * </ul> * * The procedure for preparing Unicode strings: @@ -230,7 +230,7 @@ U_NAMESPACE_END /** * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC), - * checks for prohited and BiDi characters in the order defined by RFC 3454 + * checks for prohibited and BiDi characters in the order defined by RFC 3454 * depending on the options specified in the profile. * * @param prep The profile to use diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h index 1ea27126cc..cf6ec0b6b4 100644 --- a/deps/icu-small/source/common/unicode/ustring.h +++ b/deps/icu-small/source/common/unicode/ustring.h @@ -403,7 +403,7 @@ u_strspn(const UChar *string, const UChar *matchSet); * @param saveState The current pointer within the original string, * which is set by this function. The saveState * parameter should the address of a local variable of type - * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use + * UChar *. (i.e. defined "UChar *myLocalSaveState" and use * &myLocalSaveState for this parameter). * @return A pointer to the next token found in src, or NULL * when there are no more tokens. @@ -884,7 +884,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count); * Unicode String literals in C. * We need one macro to declare a variable for the string * and to statically preinitialize it if possible, - * and a second macro to dynamically intialize such a string variable if necessary. + * and a second macro to dynamically initialize such a string variable if necessary. * * The macros are defined for maximum performance. * They work only for strings that contain "invariant characters", i.e., diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h index 7eea1da240..51d11a2e00 100644 --- a/deps/icu-small/source/common/unicode/utext.h +++ b/deps/icu-small/source/common/unicode/utext.h @@ -655,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut); * @param ut the UText from which to extract data. * @param nativeStart the native index of the first character to extract.\ * If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength + * it will be pinned to be within 0 <= index <= textLength * @param nativeLimit the native string index of the position following the last * character to extract. If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength. + * it will be pinned to be within 0 <= index <= textLength. * nativeLimit must be >= nativeStart. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed * @param destCapacity The size, in UChars, of the destination buffer. May be zero @@ -906,7 +906,7 @@ utext_copy(UText *ut, * Caution: freezing a UText will disable changes made via the specific * frozen UText wrapper only; it will not have any effect on the ability to * directly modify the text by bypassing the UText. Any such backdoor modifications - * are always an error while UText access is occuring because the underlying + * are always an error while UText access is occurring because the underlying * text can get out of sync with UText's buffering. * </p> * @@ -1452,7 +1452,7 @@ struct UText { void *pExtra; /** - * (protected) Pointer to string or text-containin object or similar. + * (protected) Pointer to string or text-containing object or similar. * This is the source of the text that this UText is wrapping, in a format * that is known to the text provider functions. * @stable ICU 3.4 diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h index 59b4b25570..1f07634359 100644 --- a/deps/icu-small/source/common/unicode/utf8.h +++ b/deps/icu-small/source/common/unicode/utf8.h @@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT_UNSAFE * @stable ICU 2.4 */ -#define U8_NEXT(s, i, length, c) { \ - (c)=(uint8_t)(s)[(i)++]; \ - if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ - } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \ - } \ - } \ -} +#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL) /** * Get a code point from a string at a code point boundary offset, @@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT * @stable ICU 51 */ -#define U8_NEXT_OR_FFFD(s, i, length, c) { \ +#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) + +/** @internal */ +#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \ (c)=(uint8_t)(s)[(i)++]; \ if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ + uint8_t __t = 0; \ + if((i)!=(length) && \ + /* fetch/validate/assemble all but last trail byte */ \ + ((c)>=0xe0 ? \ + ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ + (__t&=0x3f, 1) \ + : /* U+10000..U+10FFFF */ \ + ((c)-=0xf0)<=4 && \ + U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ + ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ + (__t=(s)[i]-0x80)<=0x3f) && \ + /* valid second-to-last trail byte */ \ + ((c)=((c)<<6)|__t, ++(i)!=(length)) \ + : /* U+0080..U+07FF */ \ + (c)>=0xc2 && ((c)&=0x1f, 1)) && \ + /* last trail byte */ \ + (__t=(s)[i]-0x80)<=0x3f && \ + ((c)=((c)<<6)|__t, ++(i), 1)) { \ } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \ + (c)=(sub); /* ill-formed*/ \ } \ } \ } @@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ } else { \ - if((uint32_t)(c)<=0x7ff) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + if(__uc<=0x7ff) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ } else { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + if(__uc<=0xffff) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ } else { \ - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } \ } @@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND(s, i, capacity, c, isError) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else if(__uc<=0x7ff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } else { \ - (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \ + (isError)=TRUE; \ } \ } @@ -600,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * If the offset points to a UTF-8 trail byte, * then the offset is moved backward to the corresponding lead byte. * Otherwise, it is not modified. + * * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. * * @param s const uint8_t * string * @param start int32_t starting string offset (usually 0) * @param i int32_t string offset, must be start<=i * @see U8_SET_CP_START_UNSAFE + * @see U8_TRUNCATE_IF_INCOMPLETE * @stable ICU 2.4 */ #define U8_SET_CP_START(s, start, i) { \ @@ -614,6 +609,57 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } \ } +#ifndef U_HIDE_DRAFT_API +/** + * If the string ends with a UTF-8 byte sequence that is valid so far + * but incomplete, then reduce the length of the string to end before + * the lead byte of that incomplete sequence. + * For example, if the string ends with E1 80, the length is reduced by 2. + * + * In all other cases (the string ends with a complete sequence, or it is not + * possible for any further trail byte to extend the trailing sequence) + * the length remains unchanged. + * + * Useful for processing text split across multiple buffers + * (save the incomplete sequence for later) + * and for optimizing iteration + * (check for string length only once per character). + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_SET_CP_START(), this macro never reads s[length]. + * + * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param length int32_t string length (usually start<=length) + * @see U8_SET_CP_START + * @draft ICU 61 + */ +#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \ + if((length)>(start)) { \ + uint8_t __b1=s[(length)-1]; \ + if(U8_IS_SINGLE(__b1)) { \ + /* common ASCII character */ \ + } else if(U8_IS_LEAD(__b1)) { \ + --(length); \ + } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ + uint8_t __b2=s[(length)-2]; \ + if(0xe0<=__b2 && __b2<=0xf4) { \ + if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ + U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ + (length)-=2; \ + } \ + } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ + uint8_t __b3=s[(length)-3]; \ + if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ + (length)-=3; \ + } \ + } \ + } \ + } +#endif // U_HIDE_DRAFT_API + /* definitions with backward iteration -------------------------------------- */ /** diff --git a/deps/icu-small/source/common/unicode/utrace.h b/deps/icu-small/source/common/unicode/utrace.h index 5d561109c7..bf6fd036f0 100644 --- a/deps/icu-small/source/common/unicode/utrace.h +++ b/deps/icu-small/source/common/unicode/utrace.h @@ -183,7 +183,7 @@ UTraceData(const void *context, int32_t fnNumber, int32_t level, * tracing functions must themselves filter by checking that the * current thread is the desired thread. * - * @param context an uninterpretted pointer. Whatever is passed in + * @param context an uninterpreted pointer. Whatever is passed in * here will in turn be passed to each of the tracing * functions UTraceEntry, UTraceExit and UTraceData. * ICU does not use or alter this pointer. @@ -320,7 +320,7 @@ utrace_getFunctions(const void **context, * human readable form. Note that a UTraceData function may choose * to not format the data; it could, for example, save it in * in the raw form it was received (more compact), leaving - * formatting for a later trace analyis tool. + * formatting for a later trace analysis tool. * @param outBuf pointer to a buffer to receive the formatted output. Output * will be nul terminated if there is space in the buffer - * if the length of the requested output < the output buffer size. diff --git a/deps/icu-small/source/common/unicode/utypes.h b/deps/icu-small/source/common/unicode/utypes.h index 4c40e6a87c..b6cf496511 100644 --- a/deps/icu-small/source/common/unicode/utypes.h +++ b/deps/icu-small/source/common/unicode/utypes.h @@ -145,7 +145,7 @@ /** * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. * Defined as a literal, not a string. - * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string + * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string * from the corresponding macro invocation, _before_ other macro substitutions. * Need a nested \#defines to get the actual version numbers rather than * the literal text U_ICU_VERSION_MAJOR_NUM into the name. @@ -446,14 +446,14 @@ typedef enum UErrorCode { U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ - U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */ + U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */ U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. - It is very possible that a circular alias definition has occured */ + It is very possible that a circular alias definition has occurred */ U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ @@ -499,7 +499,7 @@ typedef enum UErrorCode { U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ - U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */ + U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */ U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ @@ -539,12 +539,15 @@ typedef enum UErrorCode { U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ +#ifndef U_HIDE_DRAFT_API + U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */ +#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT, + U_FMT_PARSE_ERROR_LIMIT = 0x10113, #endif // U_HIDE_DEPRECATED_API /* @@ -555,7 +558,7 @@ typedef enum UErrorCode { U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ - U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */ + U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */ U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ @@ -564,7 +567,7 @@ typedef enum UErrorCode { U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ - U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal BreakIterator error code. diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index d905a0f50d..0427bcb03d 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -58,13 +58,13 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 60 +#define U_ICU_VERSION_MAJOR_NUM 61 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 2 +#define U_ICU_VERSION_MINOR_NUM 1 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -84,7 +84,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _60 +#define U_ICU_VERSION_SUFFIX _61 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -119,19 +119,26 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "60.2" +#define U_ICU_VERSION "61.1" -/** The current ICU library major/minor version as a string without dots, for library name suffixes. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 +/** + * The current ICU library major version number as a string, for library name suffixes. + * This value will change in subsequent releases of ICU. + * + * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers + * into one string without dots ("48"). + * Since ICU 49, it is the double-digit major ICU version number. + * See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU + * + * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "60" +#define U_ICU_VERSION_SHORT "61" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "60.2" +#define U_ICU_DATA_VERSION "61.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/unicode/uversion.h b/deps/icu-small/source/common/unicode/uversion.h index cda24b6e0f..3f0251d399 100644 --- a/deps/icu-small/source/common/unicode/uversion.h +++ b/deps/icu-small/source/common/unicode/uversion.h @@ -105,7 +105,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; * @stable ICU 2.4 */ -/* Define namespace symbols if the compiler supports it. */ +/* Define C++ namespace symbols. */ #ifdef __cplusplus # if U_DISABLE_RENAMING # define U_ICU_NAMESPACE icu @@ -122,7 +122,13 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; # define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: # ifndef U_USING_ICU_NAMESPACE -# define U_USING_ICU_NAMESPACE 1 +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ + defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_USING_ICU_NAMESPACE 0 +# else +# define U_USING_ICU_NAMESPACE 0 +# endif # endif # if U_USING_ICU_NAMESPACE U_NAMESPACE_USE diff --git a/deps/icu-small/source/common/unifiedcache.cpp b/deps/icu-small/source/common/unifiedcache.cpp index fd0be593d7..f0f660ed06 100644 --- a/deps/icu-small/source/common/unifiedcache.cpp +++ b/deps/icu-small/source/common/unifiedcache.cpp @@ -6,24 +6,26 @@ * others. All Rights Reserved. ****************************************************************************** * -* File UNIFIEDCACHE.CPP +* File unifiedcache.cpp ****************************************************************************** */ -#include "uhash.h" #include "unifiedcache.h" -#include "umutex.h" + +#include <algorithm> // For std::max() + #include "mutex.h" #include "uassert.h" +#include "uhash.h" #include "ucln_cmn.h" +#include "umutex.h" static icu::UnifiedCache *gCache = NULL; -static icu::SharedObject *gNoValue = NULL; static UMutex gCacheMutex = U_MUTEX_INITIALIZER; static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER; static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; -static const int32_t MAX_EVICT_ITERATIONS = 10; +static const int32_t MAX_EVICT_ITERATIONS = 10; static const int32_t DEFAULT_MAX_UNUSED = 1000; static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100; @@ -35,10 +37,6 @@ static UBool U_CALLCONV unifiedcache_cleanup() { delete gCache; gCache = NULL; } - if (gNoValue) { - delete gNoValue; - gNoValue = NULL; - } return TRUE; } U_CDECL_END @@ -73,23 +71,15 @@ static void U_CALLCONV cacheInit(UErrorCode &status) { ucln_common_registerCleanup( UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup); - // gNoValue must be created first to avoid assertion error in - // cache constructor. - gNoValue = new SharedObject(); gCache = new UnifiedCache(status); if (gCache == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(status)) { delete gCache; - delete gNoValue; gCache = NULL; - gNoValue = NULL; return; } - // We add a softref because we want hash elements with gNoValue to be - // elligible for purging but we don't ever want gNoValue to be deleted. - gNoValue->addSoftRef(); } UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { @@ -104,14 +94,24 @@ UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { UnifiedCache::UnifiedCache(UErrorCode &status) : fHashtable(NULL), fEvictPos(UHASH_FIRST), - fItemsInUseCount(0), + fNumValuesTotal(0), + fNumValuesInUse(0), fMaxUnused(DEFAULT_MAX_UNUSED), fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE), - fAutoEvictedCount(0) { + fAutoEvictedCount(0), + fNoValue(nullptr) { if (U_FAILURE(status)) { return; } - U_ASSERT(gNoValue != NULL); + fNoValue = new SharedObject(); + if (fNoValue == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted + fNoValue->hardRefCount = 1; // when other references to it are removed. + fNoValue->cachePtr = this; + fHashtable = uhash_open( &ucache_hashKeys, &ucache_compareKeys, @@ -139,7 +139,7 @@ void UnifiedCache::setEvictionPolicy( int32_t UnifiedCache::unusedCount() const { Mutex lock(&gCacheMutex); - return uhash_count(fHashtable) - fItemsInUseCount; + return uhash_count(fHashtable) - fNumValuesInUse; } int64_t UnifiedCache::autoEvictedCount() const { @@ -161,6 +161,12 @@ void UnifiedCache::flush() const { while (_flush(FALSE)); } +void UnifiedCache::handleUnreferencedObject() const { + Mutex lock(&gCacheMutex); + --fNumValuesInUse; + _runEvictionSlice(); +} + #ifdef UNIFIED_CACHE_DEBUG #include <stdio.h> @@ -199,7 +205,7 @@ void UnifiedCache::_dumpContents() const { "Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n", key->writeDescription(buffer, 256), key->creationStatus, - sharedObject == gNoValue ? NULL :sharedObject, + sharedObject == fNoValue ? NULL :sharedObject, sharedObject->getRefCount(), sharedObject->getSoftRefCount()); } @@ -219,10 +225,11 @@ UnifiedCache::~UnifiedCache() { _flush(TRUE); } uhash_close(fHashtable); + fHashtable = nullptr; + delete fNoValue; + fNoValue = nullptr; } -// Returns the next element in the cache round robin style. -// On entry, gCacheMutex must be held. const UHashElement * UnifiedCache::_nextElement() const { const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos); @@ -233,46 +240,36 @@ UnifiedCache::_nextElement() const { return element; } -// Flushes the contents of the cache. If cache values hold references to other -// cache values then _flush should be called in a loop until it returns FALSE. -// On entry, gCacheMutex must be held. -// On exit, those values with are evictable are flushed. If all is true -// then every value is flushed even if it is not evictable. -// Returns TRUE if any value in cache was flushed or FALSE otherwise. UBool UnifiedCache::_flush(UBool all) const { UBool result = FALSE; int32_t origSize = uhash_count(fHashtable); for (int32_t i = 0; i < origSize; ++i) { const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } if (all || _isEvictable(element)) { const SharedObject *sharedObject = (const SharedObject *) element->value.pointer; + U_ASSERT(sharedObject->cachePtr = this); uhash_removeElement(fHashtable, element); - sharedObject->removeSoftRef(); + removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero. result = TRUE; } } return result; } -// Computes how many items should be evicted. -// On entry, gCacheMutex must be held. -// Returns number of items that should be evicted or a value <= 0 if no -// items need to be evicted. int32_t UnifiedCache::_computeCountOfItemsToEvict() const { - int32_t maxPercentageOfInUseCount = - fItemsInUseCount * fMaxPercentageOfInUse / 100; - int32_t maxUnusedCount = fMaxUnused; - if (maxUnusedCount < maxPercentageOfInUseCount) { - maxUnusedCount = maxPercentageOfInUseCount; - } - return uhash_count(fHashtable) - fItemsInUseCount - maxUnusedCount; + int32_t totalItems = uhash_count(fHashtable); + int32_t evictableItems = totalItems - fNumValuesInUse; + + int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100; + int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused); + int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit); + return countOfItemsToEvict; } -// Run an eviction slice. -// On entry, gCacheMutex must be held. -// _runEvictionSlice runs a slice of the evict pipeline by examining the next -// 10 entries in the cache round robin style evicting them if they are eligible. void UnifiedCache::_runEvictionSlice() const { int32_t maxItemsToEvict = _computeCountOfItemsToEvict(); if (maxItemsToEvict <= 0) { @@ -280,11 +277,14 @@ void UnifiedCache::_runEvictionSlice() const { } for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) { const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } if (_isEvictable(element)) { const SharedObject *sharedObject = (const SharedObject *) element->value.pointer; uhash_removeElement(fHashtable, element); - sharedObject->removeSoftRef(); + removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero. ++fAutoEvictedCount; if (--maxItemsToEvict == 0) { break; @@ -293,11 +293,6 @@ void UnifiedCache::_runEvictionSlice() const { } } - -// Places a new value and creationStatus in the cache for the given key. -// On entry, gCacheMutex must be held. key must not exist in the cache. -// On exit, value and creation status placed under key. Soft reference added -// to value on successful add. On error sets status. void UnifiedCache::_putNew( const CacheKeyBase &key, const SharedObject *value, @@ -312,24 +307,17 @@ void UnifiedCache::_putNew( return; } keyToAdopt->fCreationStatus = creationStatus; - if (value->noSoftReferences()) { + if (value->softRefCount == 0) { _registerMaster(keyToAdopt, value); } - uhash_put(fHashtable, keyToAdopt, (void *) value, &status); + void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status); + U_ASSERT(oldValue == nullptr); + (void)oldValue; if (U_SUCCESS(status)) { - value->addSoftRef(); + value->softRefCount++; } } -// Places value and status at key if there is no value at key or if cache -// entry for key is in progress. Otherwise, it leaves the current value and -// status there. -// On entry. gCacheMutex must not be held. value must be -// included in the reference count of the object to which it points. -// On exit, value and status are changed to what was already in the cache if -// something was there and not in progress. Otherwise, value and status are left -// unchanged in which case they are placed in the cache on a best-effort basis. -// Caller must call removeRef() on value. void UnifiedCache::_putIfAbsentAndGet( const CacheKeyBase &key, const SharedObject *&value, @@ -352,15 +340,7 @@ void UnifiedCache::_putIfAbsentAndGet( _runEvictionSlice(); } -// Attempts to fetch value and status for key from cache. -// On entry, gCacheMutex must not be held value must be NULL and status must -// be U_ZERO_ERROR. -// On exit, either returns FALSE (In this -// case caller should try to create the object) or returns TRUE with value -// pointing to the fetched value and status set to fetched status. When -// FALSE is returned status may be set to failure if an in progress hash -// entry could not be made but value will remain unchanged. When TRUE is -// returned, caler must call removeRef() on value. + UBool UnifiedCache::_poll( const CacheKeyBase &key, const SharedObject *&value, @@ -369,27 +349,29 @@ UBool UnifiedCache::_poll( U_ASSERT(status == U_ZERO_ERROR); Mutex lock(&gCacheMutex); const UHashElement *element = uhash_find(fHashtable, &key); - while (element != NULL && _inProgress(element)) { + + // If the hash table contains an inProgress placeholder entry for this key, + // this means that another thread is currently constructing the value object. + // Loop, waiting for that construction to complete. + while (element != NULL && _inProgress(element)) { umtx_condWait(&gInProgressValueAddedCond, &gCacheMutex); element = uhash_find(fHashtable, &key); } + + // If the hash table contains an entry for the key, + // fetch out the contents and return them. if (element != NULL) { - _fetch(element, value, status); + _fetch(element, value, status); return TRUE; } - _putNew(key, gNoValue, U_ZERO_ERROR, status); + + // The hash table contained nothing for this key. + // Insert an inProgress place holder value. + // Our caller will create the final value and update the hash table. + _putNew(key, fNoValue, U_ZERO_ERROR, status); return FALSE; } -// Gets value out of cache. -// On entry. gCacheMutex must not be held. value must be NULL. status -// must be U_ZERO_ERROR. -// On exit. value and status set to what is in cache at key or on cache -// miss the key's createObject() is called and value and status are set to -// the result of that. In this latter case, best effort is made to add the -// value and status to the cache. If createObject() fails to create a value, -// gNoValue is stored in cache, and value is set to NULL. Caller must call -// removeRef on value if non NULL. void UnifiedCache::_get( const CacheKeyBase &key, const SharedObject *&value, @@ -398,7 +380,7 @@ void UnifiedCache::_get( U_ASSERT(value == NULL); U_ASSERT(status == U_ZERO_ERROR); if (_poll(key, value, status)) { - if (value == gNoValue) { + if (value == fNoValue) { SharedObject::clearPtr(value); } return; @@ -410,46 +392,22 @@ void UnifiedCache::_get( U_ASSERT(value == NULL || value->hasHardReferences()); U_ASSERT(value != NULL || status != U_ZERO_ERROR); if (value == NULL) { - SharedObject::copyPtr(gNoValue, value); + SharedObject::copyPtr(fNoValue, value); } _putIfAbsentAndGet(key, value, status); - if (value == gNoValue) { + if (value == fNoValue) { SharedObject::clearPtr(value); } } -void UnifiedCache::decrementItemsInUseWithLockingAndEviction() const { - Mutex mutex(&gCacheMutex); - decrementItemsInUse(); - _runEvictionSlice(); -} - -void UnifiedCache::incrementItemsInUse() const { - ++fItemsInUseCount; -} - -void UnifiedCache::decrementItemsInUse() const { - --fItemsInUseCount; +void UnifiedCache::_registerMaster( + const CacheKeyBase *theKey, const SharedObject *value) const { + theKey->fIsMaster = true; + value->cachePtr = this; + ++fNumValuesTotal; + ++fNumValuesInUse; } -// Register a master cache entry. -// On entry, gCacheMutex must be held. -// On exit, items in use count incremented, entry is marked as a master -// entry, and value registered with cache so that subsequent calls to -// addRef() and removeRef() on it correctly updates items in use count -void UnifiedCache::_registerMaster( - const CacheKeyBase *theKey, const SharedObject *value) const { - theKey->fIsMaster = TRUE; - ++fItemsInUseCount; - value->registerWithCache(this); -} - -// Store a value and error in given hash entry. -// On entry, gCacheMutex must be held. Hash entry element must be in progress. -// value must be non NULL. -// On Exit, soft reference added to value. value and status stored in hash -// entry. Soft reference removed from previous stored value. Waiting -// threads notified. void UnifiedCache::_put( const UHashElement *element, const SharedObject *value, @@ -458,86 +416,52 @@ void UnifiedCache::_put( const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; const SharedObject *oldValue = (const SharedObject *) element->value.pointer; theKey->fCreationStatus = status; - if (value->noSoftReferences()) { + if (value->softRefCount == 0) { _registerMaster(theKey, value); } - value->addSoftRef(); + value->softRefCount++; UHashElement *ptr = const_cast<UHashElement *>(element); ptr->value.pointer = (void *) value; - oldValue->removeSoftRef(); + U_ASSERT(oldValue == fNoValue); + removeSoftRef(oldValue); // Tell waiting threads that we replace in-progress status with // an error. umtx_condBroadcast(&gInProgressValueAddedCond); } -void -UnifiedCache::copyPtr(const SharedObject *src, const SharedObject *&dest) { - if(src != dest) { - if(dest != NULL) { - dest->removeRefWhileHoldingCacheLock(); - } - dest = src; - if(src != NULL) { - src->addRefWhileHoldingCacheLock(); - } - } -} - -void -UnifiedCache::clearPtr(const SharedObject *&ptr) { - if (ptr != NULL) { - ptr->removeRefWhileHoldingCacheLock(); - ptr = NULL; - } -} - - -// Fetch value and error code from a particular hash entry. -// On entry, gCacheMutex must be held. value must be either NULL or must be -// included in the ref count of the object to which it points. -// On exit, value and status set to what is in the hash entry. Caller must -// eventually call removeRef on value. -// If hash entry is in progress, value will be set to gNoValue and status will -// be set to U_ZERO_ERROR. void UnifiedCache::_fetch( const UHashElement *element, const SharedObject *&value, - UErrorCode &status) { + UErrorCode &status) const { const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; status = theKey->fCreationStatus; - // Since we have the cache lock, calling regular SharedObject methods + // Since we have the cache lock, calling regular SharedObject add/removeRef // could cause us to deadlock on ourselves since they may need to lock // the cache mutex. - UnifiedCache::copyPtr((const SharedObject *) element->value.pointer, value); + removeHardRef(value); + value = static_cast<const SharedObject *>(element->value.pointer); + addHardRef(value); } -// Determine if given hash entry is in progress. -// On entry, gCacheMutex must be held. -UBool UnifiedCache::_inProgress(const UHashElement *element) { - const SharedObject *value = NULL; + +UBool UnifiedCache::_inProgress(const UHashElement* element) const { UErrorCode status = U_ZERO_ERROR; + const SharedObject * value = NULL; _fetch(element, value, status); UBool result = _inProgress(value, status); - - // Since we have the cache lock, calling regular SharedObject methods - // could cause us to deadlock on ourselves since they may need to lock - // the cache mutex. - UnifiedCache::clearPtr(value); + removeHardRef(value); return result; } -// Determine if given hash entry is in progress. -// On entry, gCacheMutex must be held. UBool UnifiedCache::_inProgress( - const SharedObject *theValue, UErrorCode creationStatus) { - return (theValue == gNoValue && creationStatus == U_ZERO_ERROR); + const SharedObject* theValue, UErrorCode creationStatus) const { + return (theValue == fNoValue && creationStatus == U_ZERO_ERROR); } -// Determine if given hash entry is eligible for eviction. -// On entry, gCacheMutex must be held. -UBool UnifiedCache::_isEvictable(const UHashElement *element) { +UBool UnifiedCache::_isEvictable(const UHashElement *element) const +{ const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; const SharedObject *theValue = (const SharedObject *) element->value.pointer; @@ -549,7 +473,47 @@ UBool UnifiedCache::_isEvictable(const UHashElement *element) { // We can evict entries that are either not a master or have just // one reference (The one reference being from the cache itself). - return (!theKey->fIsMaster || (theValue->getSoftRefCount() == 1 && theValue->noHardReferences())); + return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences())); +} + +void UnifiedCache::removeSoftRef(const SharedObject *value) const { + U_ASSERT(value->cachePtr == this); + U_ASSERT(value->softRefCount > 0); + if (--value->softRefCount == 0) { + --fNumValuesTotal; + if (value->noHardReferences()) { + delete value; + } else { + // This path only happens from flush(all). Which only happens from the + // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior + // of value.removeRef(), causing the deletion to be done there. + value->cachePtr = nullptr; + } + } +} + +int32_t UnifiedCache::removeHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_dec(&value->hardRefCount); + U_ASSERT(refCount >= 0); + if (refCount == 0) { + --fNumValuesInUse; + } + } + return refCount; +} + +int32_t UnifiedCache::addHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_inc(&value->hardRefCount); + U_ASSERT(refCount >= 1); + if (refCount == 1) { + fNumValuesInUse++; + } + } + return refCount; } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unifiedcache.h b/deps/icu-small/source/common/unifiedcache.h index 947ebbdc78..b3ccd60d17 100644 --- a/deps/icu-small/source/common/unifiedcache.h +++ b/deps/icu-small/source/common/unifiedcache.h @@ -190,7 +190,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { UnifiedCache(UErrorCode &status); /** - * Returns the cache instance. + * Return a pointer to the global cache instance. */ static UnifiedCache *getInstance(UErrorCode &status); @@ -294,7 +294,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { /** * Configures at what point evcition of unused entries will begin. - * Eviction is triggered whenever the number of unused entries exeeds + * Eviction is triggered whenever the number of evictable keys exeeds * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100). * Once the number of unused entries drops below one of these, * eviction ceases. Because eviction happens incrementally, @@ -341,60 +341,214 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { */ int32_t unusedCount() const; - virtual void incrementItemsInUse() const; - virtual void decrementItemsInUseWithLockingAndEviction() const; - virtual void decrementItemsInUse() const; + virtual void handleUnreferencedObject() const; virtual ~UnifiedCache(); + private: UHashtable *fHashtable; mutable int32_t fEvictPos; - mutable int32_t fItemsInUseCount; + mutable int32_t fNumValuesTotal; + mutable int32_t fNumValuesInUse; int32_t fMaxUnused; int32_t fMaxPercentageOfInUse; mutable int64_t fAutoEvictedCount; + SharedObject *fNoValue; + UnifiedCache(const UnifiedCache &other); UnifiedCache &operator=(const UnifiedCache &other); + + /** + * Flushes the contents of the cache. If cache values hold references to other + * cache values then _flush should be called in a loop until it returns FALSE. + * + * On entry, gCacheMutex must be held. + * On exit, those values with are evictable are flushed. + * + * @param all if false flush evictable items only, which are those with no external + * references, plus those that can be safely recreated.<br> + * if true, flush all elements. Any values (sharedObjects) with remaining + * hard (external) references are not deleted, but are detached from + * the cache, so that a subsequent removeRefs can delete them. + * _flush is not thread safe when all is true. + * @return TRUE if any value in cache was flushed or FALSE otherwise. + */ UBool _flush(UBool all) const; + + /** + * Gets value out of cache. + * On entry. gCacheMutex must not be held. value must be NULL. status + * must be U_ZERO_ERROR. + * On exit. value and status set to what is in cache at key or on cache + * miss the key's createObject() is called and value and status are set to + * the result of that. In this latter case, best effort is made to add the + * value and status to the cache. If createObject() fails to create a value, + * fNoValue is stored in cache, and value is set to NULL. Caller must call + * removeRef on value if non NULL. + */ void _get( const CacheKeyBase &key, const SharedObject *&value, const void *creationContext, UErrorCode &status) const; - UBool _poll( - const CacheKeyBase &key, - const SharedObject *&value, - UErrorCode &status) const; - void _putNew( - const CacheKeyBase &key, - const SharedObject *value, - const UErrorCode creationStatus, - UErrorCode &status) const; + + /** + * Attempts to fetch value and status for key from cache. + * On entry, gCacheMutex must not be held value must be NULL and status must + * be U_ZERO_ERROR. + * On exit, either returns FALSE (In this + * case caller should try to create the object) or returns TRUE with value + * pointing to the fetched value and status set to fetched status. When + * FALSE is returned status may be set to failure if an in progress hash + * entry could not be made but value will remain unchanged. When TRUE is + * returned, caller must call removeRef() on value. + */ + UBool _poll( + const CacheKeyBase &key, + const SharedObject *&value, + UErrorCode &status) const; + + /** + * Places a new value and creationStatus in the cache for the given key. + * On entry, gCacheMutex must be held. key must not exist in the cache. + * On exit, value and creation status placed under key. Soft reference added + * to value on successful add. On error sets status. + */ + void _putNew( + const CacheKeyBase &key, + const SharedObject *value, + const UErrorCode creationStatus, + UErrorCode &status) const; + + /** + * Places value and status at key if there is no value at key or if cache + * entry for key is in progress. Otherwise, it leaves the current value and + * status there. + * + * On entry. gCacheMutex must not be held. Value must be + * included in the reference count of the object to which it points. + * + * On exit, value and status are changed to what was already in the cache if + * something was there and not in progress. Otherwise, value and status are left + * unchanged in which case they are placed in the cache on a best-effort basis. + * Caller must call removeRef() on value. + */ void _putIfAbsentAndGet( const CacheKeyBase &key, const SharedObject *&value, UErrorCode &status) const; - const UHashElement *_nextElement() const; + + /** + * Returns the next element in the cache round robin style. + * Returns nullptr if the cache is empty. + * On entry, gCacheMutex must be held. + */ + const UHashElement *_nextElement() const; + + /** + * Return the number of cache items that would need to be evicted + * to bring usage into conformance with eviction policy. + * + * An item corresponds to an entry in the hash table, a hash table element. + * + * On entry, gCacheMutex must be held. + */ int32_t _computeCountOfItemsToEvict() const; + + /** + * Run an eviction slice. + * On entry, gCacheMutex must be held. + * _runEvictionSlice runs a slice of the evict pipeline by examining the next + * 10 entries in the cache round robin style evicting them if they are eligible. + */ void _runEvictionSlice() const; - void _registerMaster( - const CacheKeyBase *theKey, const SharedObject *value) const; + + /** + * Register a master cache entry. A master key is the first key to create + * a given SharedObject value. Subsequent keys whose create function + * produce referneces to an already existing SharedObject are not masters - + * they can be evicted and subsequently recreated. + * + * On entry, gCacheMutex must be held. + * On exit, items in use count incremented, entry is marked as a master + * entry, and value registered with cache so that subsequent calls to + * addRef() and removeRef() on it correctly interact with the cache. + */ + void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const; + + /** + * Store a value and creation error status in given hash entry. + * On entry, gCacheMutex must be held. Hash entry element must be in progress. + * value must be non NULL. + * On Exit, soft reference added to value. value and status stored in hash + * entry. Soft reference removed from previous stored value. Waiting + * threads notified. + */ void _put( const UHashElement *element, const SharedObject *value, const UErrorCode status) const; + /** + * Remove a soft reference, and delete the SharedObject if no references remain. + * To be used from within the UnifiedCache implementation only. + * gCacheMutex must be held by caller. + * @param value the SharedObject to be acted on. + */ + void removeSoftRef(const SharedObject *value) const; + + /** + * Increment the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between zero and one reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the addition. + */ + int32_t addHardRef(const SharedObject *value) const; + + /** + * Decrement the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between one and zero reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the removal. + */ + int32_t removeHardRef(const SharedObject *value) const; + + #ifdef UNIFIED_CACHE_DEBUG void _dumpContents() const; #endif - static void copyPtr(const SharedObject *src, const SharedObject *&dest); - static void clearPtr(const SharedObject *&ptr); - static void _fetch( - const UHashElement *element, - const SharedObject *&value, - UErrorCode &status); - static UBool _inProgress(const UHashElement *element); - static UBool _inProgress( - const SharedObject *theValue, UErrorCode creationStatus); - static UBool _isEvictable(const UHashElement *element); + + /** + * Fetch value and error code from a particular hash entry. + * On entry, gCacheMutex must be held. value must be either NULL or must be + * included in the ref count of the object to which it points. + * On exit, value and status set to what is in the hash entry. Caller must + * eventually call removeRef on value. + * If hash entry is in progress, value will be set to gNoValue and status will + * be set to U_ZERO_ERROR. + */ + void _fetch(const UHashElement *element, const SharedObject *&value, + UErrorCode &status) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const UHashElement *element) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const; + + /** + * Determine if given hash entry is eligible for eviction. + * On entry, gCacheMutex must be held. + */ + UBool _isEvictable(const UHashElement *element) const; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/uniset_closure.cpp b/deps/icu-small/source/common/uniset_closure.cpp index b5cc213941..97c7bc9d35 100644 --- a/deps/icu-small/source/common/uniset_closure.cpp +++ b/deps/icu-small/source/common/uniset_closure.cpp @@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, // _applyPattern calls add() etc., which set pat to empty. UnicodeString rebuiltPat; RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status); + applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status); if (U_FAILURE(status)) return *this; if (chars.inVariable()) { // syntaxError(chars, "Extra chars in variable value"); diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index d0ed074a9b..ef5d6a32b2 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { ucase_addPropertyStarts(&sa, &status); break; case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status); + ubidi_addPropertyStarts(&sa, &status); break; default: status = U_INTERNAL_PROGRAM_ERROR; @@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { return i.fSet; } +namespace { // Cache some sets for other services -------------------------------------- *** void U_CALLCONV createUni32Set(UErrorCode &errorCode) { @@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) { // memory leak checker tools #define _dbgct(me) +} // namespace + //---------------------------------------------------------------- // Constructors &c //---------------------------------------------------------------- @@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, // _applyPattern calls add() etc., which set pat to empty. UnicodeString rebuiltPat; RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status); + applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status); if (U_FAILURE(status)) return; if (chars.inVariable()) { // syntaxError(chars, "Extra chars in variable value"); @@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { // Implementation: Pattern parsing //---------------------------------------------------------------- +namespace { + /** * A small all-inline class to manage a UnicodeSet pointer. Add * operator->() etc. as needed. @@ -424,6 +429,10 @@ public: } }; +constexpr int32_t MAX_DEPTH = 100; + +} // namespace + /** * Parse the pattern from the given RuleCharacterIterator. The * iterator is advanced over the parsed pattern. @@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, UErrorCode& ec) { if (U_FAILURE(ec)) return; + if (depth > MAX_DEPTH) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + return; + } // Syntax characters: [ ] ^ - & { } @@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, } switch (setMode) { case 1: - nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec); + nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec); break; case 2: chars.skipIgnored(opts); @@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, // Property set implementation //---------------------------------------------------------------- +namespace { + static UBool numericValueFilter(UChar32 ch, void* context) { return u_getNumericValue(ch) == *(double*)context; } @@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) { return uscript_hasScript(ch, *(UScriptCode*)context); } +} // namespace + /** * Generic filter-based scanning code for UCD property UnicodeSets. */ @@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, } } +namespace { + static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { /* Note: we use ' ' in compiler code page */ int32_t j = 0; @@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { return TRUE; } +} // namespace + //---------------------------------------------------------------- // Property set API //---------------------------------------------------------------- diff --git a/deps/icu-small/source/common/uprops.cpp b/deps/icu-small/source/common/uprops.cpp index ace3c4d6d0..b76896db1b 100644 --- a/deps/icu-small/source/common/uprops.cpp +++ b/deps/icu-small/source/common/uprops.cpp @@ -38,8 +38,6 @@ U_NAMESPACE_USE -#define GET_BIDI_PROPS() ubidi_getSingleton() - /* general properties API functions ----------------------------------------- */ struct BinaryProperty; @@ -62,15 +60,15 @@ static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 } static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isBidiControl(GET_BIDI_PROPS(), c); + return ubidi_isBidiControl(c); } static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isMirrored(GET_BIDI_PROPS(), c); + return ubidi_isMirrored(c); } static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isJoinControl(GET_BIDI_PROPS(), c); + return ubidi_isJoinControl(c); } #if UCONFIG_NO_NORMALIZATION @@ -329,11 +327,11 @@ static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /* } static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); + return (int32_t)ubidi_getPairedBracketType(c); } static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { - return ubidi_getMaxValue(GET_BIDI_PROPS(), which); + return ubidi_getMaxValue(which); } #if UCONFIG_NO_NORMALIZATION @@ -351,11 +349,11 @@ static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope } static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); + return ubidi_getJoiningGroup(c); } static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningType(GET_BIDI_PROPS(), c); + return ubidi_getJoiningType(c); } static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { diff --git a/deps/icu-small/source/common/ushape.cpp b/deps/icu-small/source/common/ushape.cpp index d7886ac06c..c3f3ef9e20 100644 --- a/deps/icu-small/source/common/ushape.cpp +++ b/deps/icu-small/source/common/ushape.cpp @@ -342,18 +342,16 @@ static void _shapeToArabicDigitsWithContext(UChar *s, int32_t length, UChar digitBase, UBool isLogical, UBool lastStrongWasAL) { - const UBiDiProps *bdp; int32_t i; UChar c; - bdp=ubidi_getSingleton(); digitBase-=0x30; /* the iteration direction depends on the type of input */ if(isLogical) { for(i=0; i<length; ++i) { c=s[i]; - switch(ubidi_getClass(bdp, c)) { + switch(ubidi_getClass(c)) { case U_LEFT_TO_RIGHT: /* L */ case U_RIGHT_TO_LEFT: /* R */ lastStrongWasAL=FALSE; @@ -373,7 +371,7 @@ _shapeToArabicDigitsWithContext(UChar *s, int32_t length, } else { for(i=length; i>0; /* pre-decrement in the body */) { c=s[--i]; - switch(ubidi_getClass(bdp, c)) { + switch(ubidi_getClass(c)) { case U_LEFT_TO_RIGHT: /* L */ case U_RIGHT_TO_LEFT: /* R */ lastStrongWasAL=FALSE; diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp index c4f831be2e..54a77172fe 100644 --- a/deps/icu-small/source/common/usprep.cpp +++ b/deps/icu-small/source/common/usprep.cpp @@ -347,10 +347,6 @@ usprep_getProfile(const char* path, newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); - if(newProfile->checkBiDi) { - newProfile->bdp = ubidi_getSingleton(); - } - LocalMemory<UStringPrepKey> key; LocalMemory<char> keyName; LocalMemory<char> keyPath; @@ -735,7 +731,7 @@ usprep_prepare( const UStringPrepProfile* profile, } if(profile->checkBiDi) { - direction = ubidi_getClass(profile->bdp, ch); + direction = ubidi_getClass(ch); if(firstCharDir == U_CHAR_DIRECTION_COUNT){ firstCharDir = direction; } diff --git a/deps/icu-small/source/common/ustr_wcs.cpp b/deps/icu-small/source/common/ustr_wcs.cpp index 8b6e99221e..0372824f21 100644 --- a/deps/icu-small/source/common/ustr_wcs.cpp +++ b/deps/icu-small/source/common/ustr_wcs.cpp @@ -342,7 +342,7 @@ _strFromWCS( UChar *dest, pSrcLimit = src + srcLength; for(;;){ - register int32_t nulLen = 0; + int32_t nulLen = 0; /* find nulls in the string */ while(nulLen<srcLength && pSrc[nulLen++]!=0){ diff --git a/deps/icu-small/source/common/ustrcase.cpp b/deps/icu-small/source/common/ustrcase.cpp index b1beb34277..978bd3b7b8 100644 --- a/deps/icu-small/source/common/ustrcase.cpp +++ b/deps/icu-small/source/common/ustrcase.cpp @@ -52,16 +52,8 @@ int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity, return destIndex; } -} // namespace - -U_NAMESPACE_END - -U_NAMESPACE_USE - -/* string casing ------------------------------------------------------------ */ - /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ -static inline int32_t +inline int32_t appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s, int32_t cpLength, uint32_t options, icu::Edits *edits) { @@ -134,7 +126,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, return destIndex; } -static inline int32_t +inline int32_t appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { if(destIndex<destCapacity) { dest[destIndex]=c; @@ -144,28 +136,34 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { return destIndex+1; } -static inline int32_t +int32_t +appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, + const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { + if(edits!=NULL) { + edits->addUnchanged(length); + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + if((destIndex+length)<=destCapacity) { + u_memcpy(dest+destIndex, s, length); + } + return destIndex + length; +} + +inline int32_t appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { - if(length>0) { - if(edits!=NULL) { - edits->addUnchanged(length); - } - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } - if(length>(INT32_MAX-destIndex)) { - return -1; // integer overflow - } - if((destIndex+length)<=destCapacity) { - u_memcpy(dest+destIndex, s, length); - } - destIndex+=length; + if (length <= 0) { + return destIndex; } - return destIndex; + return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits); } -static UChar32 U_CALLCONV +UChar32 U_CALLCONV utf16_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; UChar32 c; @@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) { return U_SENTINEL; } -/* - * Case-maps [srcStart..srcLimit[ but takes - * context [0..srcLength[ into account. +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. */ -static int32_t -_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, - int32_t srcStart, int32_t srcLimit, - icu::Edits *edits, - UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex=srcStart; - int32_t destIndex=0; - while(srcIndex<srcLimit) { - int32_t cpStart; - csc->cpStart=cpStart=srcIndex; +int32_t toLower(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToLower; + if (caseLocale == UCASE_LOC_ROOT || + (caseLocale >= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases + UChar lead; + while (srcIndex < srcLimit) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += delta; + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLimit) { + break; + } + // slow path + int32_t cpStart = srcIndex++; + UChar trail; UChar32 c; - U16_NEXT(src, srcIndex, srcLimit, c); - csc->cpLimit=srcIndex; + if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } const UChar *s; - c=map(c, utf16_caseContextIterator, csc, &s, caseLocale); - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale); + } else { + c = ucase_toFullFolding(c, &s, options); } + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; } + return destIndex; +} +int32_t toUpper(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcLength, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + UChar lead; + while (srcIndex < srcLength) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += delta; + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLength) { + break; + } + // slow path + int32_t cpStart; + csc->cpStart = cpStart = srcIndex++; + UChar trail; + UChar32 c; + if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } return destIndex; } +} // namespace + +U_NAMESPACE_END + +U_NAMESPACE_USE + #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV @@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it if((options&U_TITLECASE_NO_LOWERCASE)==0) { /* Normal operation: Lowercase the rest of the word. */ destIndex+= - _caseMap( - caseLocale, options, ucase_toFullLower, + toLower( + caseLocale, options, dest+destIndex, destCapacity-destIndex, - src, &csc, - titleLimit, index, + src, &csc, titleLimit, index, edits, errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; @@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - int32_t destIndex = _caseMap( - caseLocale, options, ucase_toFullLower, + int32_t destIndex = toLower( + caseLocale, options, dest, destCapacity, src, &csc, 0, srcLength, edits, errorCode); @@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - destIndex = _caseMap( - caseLocale, options, ucase_toFullUpper, + destIndex = toUpper( + caseLocale, options, dest, destCapacity, - src, &csc, 0, srcLength, + src, &csc, srcLength, edits, errorCode); } return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); @@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK const UChar *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex = 0; - int32_t destIndex = 0; - while (srcIndex < srcLength) { - int32_t cpStart = srcIndex; - UChar32 c; - U16_NEXT(src, srcIndex, srcLength, c); - const UChar *s; - c = ucase_toFullFolding(c, &s, options); - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - + int32_t destIndex = toLower( + -1, options, + dest, destCapacity, + src, nullptr, 0, srcLength, + edits, errorCode); return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } diff --git a/deps/icu-small/source/common/utf_impl.cpp b/deps/icu-small/source/common/utf_impl.cpp index f78c566e09..9dd241a12b 100644 --- a/deps/icu-small/source/common/utf_impl.cpp +++ b/deps/icu-small/source/common/utf_impl.cpp @@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U int32_t i=*pi; if(U8_IS_TRAIL(c) && i>start) { uint8_t b1=s[--i]; - if(0xc2<=b1 && b1<0xe0) { - *pi=i; - return ((b1-0xc0)<<6)|(c&0x3f); + if(U8_IS_LEAD(b1)) { + if(b1<0xe0) { + *pi=i; + return ((b1-0xc0)<<6)|(c&0x3f); + } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) { + // Truncated 3- or 4-byte sequence. + *pi=i; + return errorValue(1, strict); + } } else if(U8_IS_TRAIL(b1) && i>start) { // Extract the value bits from the last trail byte. c&=0x3f; uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<0xf0) { - b2&=0xf; - if(strict!=-2) { - if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - *pi=i; - c=(b2<<12)|((b1&0x3f)<<6)|c; - if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { - return c; - } else { - // strict: forbid non-characters like U+fffe - return errorValue(2, strict); + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0) { + b2&=0xf; + if(strict!=-2) { + if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + *pi=i; + c=(b2<<12)|((b1&0x3f)<<6)|c; + if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { + return c; + } else { + // strict: forbid non-characters like U+fffe + return errorValue(2, strict); + } + } + } else { + // strict=-2 -> lenient: allow surrogates + b1-=0x80; + if((b2>0 || b1>=0x20)) { + *pi=i; + return (b2<<12)|(b1<<6)|c; } } - } else { - // strict=-2 -> lenient: allow surrogates - b1-=0x80; - if((b2>0 || b1>=0x20)) { - *pi=i; - return (b2<<12)|(b1<<6)|c; - } + } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { + // Truncated 4-byte sequence. + *pi=i; + return errorValue(2, strict); } } else if(U8_IS_TRAIL(b2) && i>start) { uint8_t b3=s[--i]; @@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U } } } - } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - // Truncated 4-byte sequence. - *pi=i; - return errorValue(2, strict); } - } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) || - (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) { - // Truncated 3- or 4-byte sequence. - *pi=i; - return errorValue(1, strict); } } return errorValue(0, strict); @@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) { uint8_t c=s[i]; if(U8_IS_TRAIL(c) && i>start) { uint8_t b1=s[--i]; - if(0xc2<=b1 && b1<0xe0) { - return i; + if(U8_IS_LEAD(b1)) { + if(b1<0xe0 || + (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) { + return i; + } } else if(U8_IS_TRAIL(b1) && i>start) { uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<0xf0) { - if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { return i; } } else if(U8_IS_TRAIL(b2) && i>start) { uint8_t b3=s[--i]; - if(0xf0<=b3 && b3<=0xf4) { - if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { - return i; - } + if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { + return i; } - } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - // Truncated 4-byte sequence. - return i; } - } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) || - (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) { - // Truncated 3- or 4-byte sequence. - return i; } } return orig_i; diff --git a/deps/icu-small/source/common/utrie.h b/deps/icu-small/source/common/utrie.h index 9c5382c594..641027a1a3 100644 --- a/deps/icu-small/source/common/utrie.h +++ b/deps/icu-small/source/common/utrie.h @@ -556,7 +556,7 @@ struct UNewTrie { * Index values at build-time are 32 bits wide for easier processing. * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()). */ - int32_t index[UTRIE_MAX_INDEX_LENGTH]; + int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT]; uint32_t *data; uint32_t leadUnitValue; diff --git a/deps/icu-small/source/common/uts46.cpp b/deps/icu-small/source/common/uts46.cpp index 9b8d3ded2f..5a23572eb6 100644 --- a/deps/icu-small/source/common/uts46.cpp +++ b/deps/icu-small/source/common/uts46.cpp @@ -1126,7 +1126,6 @@ isASCIIOkBiDi(const char *s, int32_t length) { UBool UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { - const UBiDiProps *bdp=ubidi_getSingleton(); // [IDNA2008-Tables] // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER for(int32_t i=0; i<labelLength; ++i) { @@ -1148,7 +1147,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { } // check precontext (Joining_Type:{L,D})(Joining_Type:T)* for(;;) { - UJoiningType type=ubidi_getJoiningType(bdp, c); + UJoiningType type=ubidi_getJoiningType(c); if(type==U_JT_TRANSPARENT) { if(j==0) { return FALSE; @@ -1166,7 +1165,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { return FALSE; } U16_NEXT_UNSAFE(label, j, c); - UJoiningType type=ubidi_getJoiningType(bdp, c); + UJoiningType type=ubidi_getJoiningType(c); if(type==U_JT_TRANSPARENT) { // just skip this character } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) { diff --git a/deps/icu-small/source/common/utypes.cpp b/deps/icu-small/source/common/utypes.cpp index 8f5791be16..5d6a0504ba 100644 --- a/deps/icu-small/source/common/utypes.cpp +++ b/deps/icu-small/source/common/utypes.cpp @@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_UNDEFINED_KEYWORD", "U_DEFAULT_KEYWORD_MISSING", "U_DECIMAL_NUMBER_SYNTAX_ERROR", - "U_FORMAT_INEXACT_ERROR" + "U_FORMAT_INEXACT_ERROR", + "U_NUMBER_ARG_OUTOFBOUNDS_ERROR" }; static const char * const diff --git a/deps/icu-small/source/data/in/icudt60l.dat b/deps/icu-small/source/data/in/icudt61l.dat Binary files differindex c81ffccfa9..e9c24d8d1a 100644 --- a/deps/icu-small/source/data/in/icudt60l.dat +++ b/deps/icu-small/source/data/in/icudt61l.dat diff --git a/deps/icu-small/source/i18n/alphaindex.cpp b/deps/icu-small/source/i18n/alphaindex.cpp index 692f5809b6..f4a082c5b2 100644 --- a/deps/icu-small/source/i18n/alphaindex.cpp +++ b/deps/icu-small/source/i18n/alphaindex.cpp @@ -725,7 +725,7 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status } // question: should we add auxiliary exemplars? - if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { + if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) { exemplars.add(0x61, 0x7A); } if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables @@ -740,14 +740,9 @@ void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status // cut down to small list // make use of the fact that Ethiopic is allocated in 8's, where // the base is 0 mod 8. - UnicodeSet ethiopic( - UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); - UnicodeSetIterator it(ethiopic); - while (it.next() && !it.isString()) { - if ((it.getCodepoint() & 0x7) != 0) { - exemplars.remove(it.getCodepoint()); - } - } + UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status); + ethiopic.retainAll(exemplars); + exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic); } // Upper-case any that aren't already so. diff --git a/deps/icu-small/source/i18n/calendar.cpp b/deps/icu-small/source/i18n/calendar.cpp index 7ccaa43b82..61757cb250 100644 --- a/deps/icu-small/source/i18n/calendar.cpp +++ b/deps/icu-small/source/i18n/calendar.cpp @@ -3223,14 +3223,14 @@ int32_t Calendar::handleComputeJulianDay(UCalendarDateFields bestField) { bestField == UCAL_DAY_OF_WEEK_IN_MONTH); int32_t year; - if (bestField == UCAL_WEEK_OF_YEAR) { - year = internalGet(UCAL_YEAR_WOY, handleGetExtendedYear()); - internalSet(UCAL_EXTENDED_YEAR, year); + if (bestField == UCAL_WEEK_OF_YEAR && newerField(UCAL_YEAR_WOY, UCAL_YEAR) == UCAL_YEAR_WOY) { + year = internalGet(UCAL_YEAR_WOY); } else { year = handleGetExtendedYear(); - internalSet(UCAL_EXTENDED_YEAR, year); } + internalSet(UCAL_EXTENDED_YEAR, year); + #if defined (U_DEBUG_CAL) fprintf(stderr, "%s:%d: bestField= %s - y=%d\n", __FILE__, __LINE__, fldName(bestField), year); #endif diff --git a/deps/icu-small/source/i18n/collationiterator.h b/deps/icu-small/source/i18n/collationiterator.h index d0910ea57b..12e05b4482 100644 --- a/deps/icu-small/source/i18n/collationiterator.h +++ b/deps/icu-small/source/i18n/collationiterator.h @@ -34,12 +34,12 @@ class UVector32; // Export an explicit template instantiation of the MaybeStackArray that // is used as a data member of CEBuffer. // -// MSVC requires this, even though it should not be necessary. -// No direct access to the MaybeStackArray leaks out of the i18n library. +// When building DLLs for Windows this is required even though +// no direct access to the MaybeStackArray leaks out of the i18n library. // // See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. // -#if defined (_MSC_VER) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN template class U_I18N_API MaybeStackArray<int64_t, CEBUFFER_INITIAL_CAPACITY>; #endif diff --git a/deps/icu-small/source/i18n/dcfmtsym.cpp b/deps/icu-small/source/i18n/dcfmtsym.cpp index f840fde2ab..680c3120a1 100644 --- a/deps/icu-small/source/i18n/dcfmtsym.cpp +++ b/deps/icu-small/source/i18n/dcfmtsym.cpp @@ -38,6 +38,7 @@ #include "uresimp.h" #include "ureslocs.h" #include "charstr.h" +#include "uassert.h" // ***************************************************************************** // class DecimalFormatSymbols @@ -165,6 +166,7 @@ DecimalFormatSymbols::operator=(const DecimalFormatSymbols& rhs) uprv_strcpy(actualLocale, rhs.actualLocale); fIsCustomCurrencySymbol = rhs.fIsCustomCurrencySymbol; fIsCustomIntlCurrencySymbol = rhs.fIsCustomIntlCurrencySymbol; + fCodePointZero = rhs.fCodePointZero; } return *this; } @@ -196,6 +198,7 @@ DecimalFormatSymbols::operator==(const DecimalFormatSymbols& that) const return FALSE; } } + // No need to check fCodePointZero since it is based on fSymbols return locale == that.locale && uprv_strcmp(validLocale, that.validLocale) == 0 && uprv_strcmp(actualLocale, that.actualLocale) == 0; @@ -433,6 +436,24 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status, // Let the monetary number separators equal the default number separators if necessary. sink.resolveMissingMonetarySeparators(fSymbols); + // Resolve codePointZero + UChar32 tempCodePointZero; + for (int32_t i=0; i<=9; i++) { + const UnicodeString& stringDigit = getConstDigitSymbol(i); + if (stringDigit.countChar32() != 1) { + tempCodePointZero = -1; + break; + } + UChar32 cp = stringDigit.char32At(0); + if (i == 0) { + tempCodePointZero = cp; + } else if (cp != tempCodePointZero + i) { + tempCodePointZero = -1; + break; + } + } + fCodePointZero = tempCodePointZero; + // Obtain currency data from the currency API. This is strictly // for backward compatibility; we don't use DecimalFormatSymbols // for currency data anymore. @@ -530,6 +551,8 @@ DecimalFormatSymbols::initialize() { fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents fIsCustomCurrencySymbol = FALSE; fIsCustomIntlCurrencySymbol = FALSE; + fCodePointZero = 0x30; + U_ASSERT(fCodePointZero == fSymbols[kZeroDigitSymbol].char32At(0)); } diff --git a/deps/icu-small/source/i18n/decNumber.cpp b/deps/icu-small/source/i18n/decNumber.cpp index 363f93ea72..c19493bdf3 100644 --- a/deps/icu-small/source/i18n/decNumber.cpp +++ b/deps/icu-small/source/i18n/decNumber.cpp @@ -627,10 +627,12 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromString(decNumber *dn, const char for (; *c=='0' && *(c+1)!='\0';) c++; /* strip insignificant zeros */ firstexp=c; /* save exponent digit place */ + uInt uexponent = 0; /* Avoid undefined behavior on signed int overflow */ for (; ;c++) { if (*c<'0' || *c>'9') break; /* not a digit */ - exponent=X10(exponent)+(Int)*c-(Int)'0'; + uexponent=X10(uexponent)+(uInt)*c-(uInt)'0'; } /* c */ + exponent = (Int)uexponent; /* if not now on a '\0', *c must not be a digit */ if (*c!='\0') break; diff --git a/deps/icu-small/source/i18n/decimalformatpattern.cpp b/deps/icu-small/source/i18n/decimalformatpattern.cpp index c7ec5cd966..80a1870f33 100644 --- a/deps/icu-small/source/i18n/decimalformatpattern.cpp +++ b/deps/icu-small/source/i18n/decimalformatpattern.cpp @@ -50,10 +50,12 @@ static void syntaxError(const UnicodeString& pattern, parseError.preContext[stop-start] = 0; //for post-context - start = pos+1; - stop = ((pos+U_PARSE_CONTEXT_LEN)<=pattern.length()) ? (pos+(U_PARSE_CONTEXT_LEN-1)) : - pattern.length(); - pattern.extract(start,stop-start,parseError.postContext,0); + start = pattern.moveIndex32(pos, 1); + stop = pos + U_PARSE_CONTEXT_LEN - 1; + if (stop > pattern.length()) { + stop = pattern.length(); + } + pattern.extract(start, stop - start, parseError.postContext, 0); //null terminate the buffer parseError.postContext[stop-start]= 0; } diff --git a/deps/icu-small/source/i18n/digitlst.cpp b/deps/icu-small/source/i18n/digitlst.cpp index 10a3a5dca1..37760defd7 100644 --- a/deps/icu-small/source/i18n/digitlst.cpp +++ b/deps/icu-small/source/i18n/digitlst.cpp @@ -44,12 +44,15 @@ #include "digitinterval.h" #include "ucln_in.h" #include "umutex.h" +#include "double-conversion.h" #include <stdlib.h> #include <limits.h> #include <string.h> #include <stdio.h> #include <limits> +using icu::double_conversion::DoubleToStringConverter; + #if !defined(U_USE_STRTOD_L) # if U_PLATFORM_USES_ONLY_WIN32_API # define U_USE_STRTOD_L 1 @@ -850,8 +853,53 @@ DigitList::set(double source) } else { uprv_strcpy(rep,"inf"); } + } else if (uprv_isNaN(source)) { + uprv_strcpy(rep, "NaN"); } else { - sprintf(rep, "%+1.*e", MAX_DBL_DIGITS - 1, source); + bool sign; + int32_t length; + int32_t point; + DoubleToStringConverter::DoubleToAscii( + source, + DoubleToStringConverter::DtoaMode::SHORTEST, + 0, + rep + 1, + sizeof(rep), + &sign, + &length, + &point + ); + + // Convert the raw buffer into a string for decNumber + int32_t power = point - length; + if (sign) { + rep[0] = '-'; + } else { + rep[0] = '0'; + } + length++; + rep[length++] = 'E'; + if (power < 0) { + rep[length++] = '-'; + power = -power; + } else { + rep[length++] = '+'; + } + if (power < 10) { + rep[length++] = power + '0'; + } else if (power < 100) { + rep[length++] = (power / 10) + '0'; + rep[length++] = (power % 10) + '0'; + } else { + U_ASSERT(power < 1000); + rep[length + 2] = (power % 10) + '0'; + power /= 10; + rep[length + 1] = (power % 10) + '0'; + power /= 10; + rep[length] = power + '0'; + length += 3; + } + rep[length++] = 0; } U_ASSERT(uprv_strlen(rep) < sizeof(rep)); diff --git a/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp new file mode 100644 index 0000000000..07d0b0eb0f --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp @@ -0,0 +1,659 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#include <math.h> + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-bignum-dtoa.h" + +#include "double-conversion-bignum.h" +#include "double-conversion-ieee.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +static int NormalizedExponent(uint64_t significand, int exponent) { + ASSERT(significand != 0); + while ((significand & Double::kHiddenBit) == 0) { + significand = significand << 1; + exponent = exponent - 1; + } + return exponent; +} + + +// Forward declarations: +// Returns an estimation of k such that 10^(k-1) <= v < 10^k. +static int EstimatePower(int exponent); +// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator +// and denominator. +static void InitialScaledStartValues(uint64_t significand, + int exponent, + bool lower_boundary_is_closer, + int estimated_power, + bool need_boundary_deltas, + Bignum* numerator, + Bignum* denominator, + Bignum* delta_minus, + Bignum* delta_plus); +// Multiplies numerator/denominator so that its values lies in the range 1-10. +// Returns decimal_point s.t. +// v = numerator'/denominator' * 10^(decimal_point-1) +// where numerator' and denominator' are the values of numerator and +// denominator after the call to this function. +static void FixupMultiply10(int estimated_power, bool is_even, + int* decimal_point, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus); +// Generates digits from the left to the right and stops when the generated +// digits yield the shortest decimal representation of v. +static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus, + bool is_even, + Vector<char> buffer, int* length); +// Generates 'requested_digits' after the decimal point. +static void BignumToFixed(int requested_digits, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector<char>(buffer), int* length); +// Generates 'count' digits of numerator/denominator. +// Once 'count' digits have been produced rounds the result depending on the +// remainder (remainders of exactly .5 round upwards). Might update the +// decimal_point when rounding up (for example for 0.9999). +static void GenerateCountedDigits(int count, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector<char>(buffer), int* length); + + +void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, + Vector<char> buffer, int* length, int* decimal_point) { + ASSERT(v > 0); + ASSERT(!Double(v).IsSpecial()); + uint64_t significand; + int exponent; + bool lower_boundary_is_closer; + if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) { + float f = static_cast<float>(v); + ASSERT(f == v); + significand = Single(f).Significand(); + exponent = Single(f).Exponent(); + lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser(); + } else { + significand = Double(v).Significand(); + exponent = Double(v).Exponent(); + lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser(); + } + bool need_boundary_deltas = + (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE); + + bool is_even = (significand & 1) == 0; + int normalized_exponent = NormalizedExponent(significand, exponent); + // estimated_power might be too low by 1. + int estimated_power = EstimatePower(normalized_exponent); + + // Shortcut for Fixed. + // The requested digits correspond to the digits after the point. If the + // number is much too small, then there is no need in trying to get any + // digits. + if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) { + buffer[0] = '\0'; + *length = 0; + // Set decimal-point to -requested_digits. This is what Gay does. + // Note that it should not have any effect anyways since the string is + // empty. + *decimal_point = -requested_digits; + return; + } + + Bignum numerator; + Bignum denominator; + Bignum delta_minus; + Bignum delta_plus; + // Make sure the bignum can grow large enough. The smallest double equals + // 4e-324. In this case the denominator needs fewer than 324*4 binary digits. + // The maximum double is 1.7976931348623157e308 which needs fewer than + // 308*4 binary digits. + ASSERT(Bignum::kMaxSignificantBits >= 324*4); + InitialScaledStartValues(significand, exponent, lower_boundary_is_closer, + estimated_power, need_boundary_deltas, + &numerator, &denominator, + &delta_minus, &delta_plus); + // We now have v = (numerator / denominator) * 10^estimated_power. + FixupMultiply10(estimated_power, is_even, decimal_point, + &numerator, &denominator, + &delta_minus, &delta_plus); + // We now have v = (numerator / denominator) * 10^(decimal_point-1), and + // 1 <= (numerator + delta_plus) / denominator < 10 + switch (mode) { + case BIGNUM_DTOA_SHORTEST: + case BIGNUM_DTOA_SHORTEST_SINGLE: + GenerateShortestDigits(&numerator, &denominator, + &delta_minus, &delta_plus, + is_even, buffer, length); + break; + case BIGNUM_DTOA_FIXED: + BignumToFixed(requested_digits, decimal_point, + &numerator, &denominator, + buffer, length); + break; + case BIGNUM_DTOA_PRECISION: + GenerateCountedDigits(requested_digits, decimal_point, + &numerator, &denominator, + buffer, length); + break; + default: + UNREACHABLE(); + } + buffer[*length] = '\0'; +} + + +// The procedure starts generating digits from the left to the right and stops +// when the generated digits yield the shortest decimal representation of v. A +// decimal representation of v is a number lying closer to v than to any other +// double, so it converts to v when read. +// +// This is true if d, the decimal representation, is between m- and m+, the +// upper and lower boundaries. d must be strictly between them if !is_even. +// m- := (numerator - delta_minus) / denominator +// m+ := (numerator + delta_plus) / denominator +// +// Precondition: 0 <= (numerator+delta_plus) / denominator < 10. +// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit +// will be produced. This should be the standard precondition. +static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus, + bool is_even, + Vector<char> buffer, int* length) { + // Small optimization: if delta_minus and delta_plus are the same just reuse + // one of the two bignums. + if (Bignum::Equal(*delta_minus, *delta_plus)) { + delta_plus = delta_minus; + } + *length = 0; + for (;;) { + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. + // digit = numerator / denominator (integer division). + // numerator = numerator % denominator. + buffer[(*length)++] = static_cast<char>(digit + '0'); + + // Can we stop already? + // If the remainder of the division is less than the distance to the lower + // boundary we can stop. In this case we simply round down (discarding the + // remainder). + // Similarly we test if we can round up (using the upper boundary). + bool in_delta_room_minus; + bool in_delta_room_plus; + if (is_even) { + in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus); + } else { + in_delta_room_minus = Bignum::Less(*numerator, *delta_minus); + } + if (is_even) { + in_delta_room_plus = + Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; + } else { + in_delta_room_plus = + Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; + } + if (!in_delta_room_minus && !in_delta_room_plus) { + // Prepare for next iteration. + numerator->Times10(); + delta_minus->Times10(); + // We optimized delta_plus to be equal to delta_minus (if they share the + // same value). So don't multiply delta_plus if they point to the same + // object. + if (delta_minus != delta_plus) { + delta_plus->Times10(); + } + } else if (in_delta_room_minus && in_delta_room_plus) { + // Let's see if 2*numerator < denominator. + // If yes, then the next digit would be < 5 and we can round down. + int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator); + if (compare < 0) { + // Remaining digits are less than .5. -> Round down (== do nothing). + } else if (compare > 0) { + // Remaining digits are more than .5 of denominator. -> Round up. + // Note that the last digit could not be a '9' as otherwise the whole + // loop would have stopped earlier. + // We still have an assert here in case the preconditions were not + // satisfied. + ASSERT(buffer[(*length) - 1] != '9'); + buffer[(*length) - 1]++; + } else { + // Halfway case. + // TODO(floitsch): need a way to solve half-way cases. + // For now let's round towards even (since this is what Gay seems to + // do). + + if ((buffer[(*length) - 1] - '0') % 2 == 0) { + // Round down => Do nothing. + } else { + ASSERT(buffer[(*length) - 1] != '9'); + buffer[(*length) - 1]++; + } + } + return; + } else if (in_delta_room_minus) { + // Round down (== do nothing). + return; + } else { // in_delta_room_plus + // Round up. + // Note again that the last digit could not be '9' since this would have + // stopped the loop earlier. + // We still have an ASSERT here, in case the preconditions were not + // satisfied. + ASSERT(buffer[(*length) -1] != '9'); + buffer[(*length) - 1]++; + return; + } + } +} + + +// Let v = numerator / denominator < 10. +// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) +// from left to right. Once 'count' digits have been produced we decide wether +// to round up or down. Remainders of exactly .5 round upwards. Numbers such +// as 9.999999 propagate a carry all the way, and change the +// exponent (decimal_point), when rounding upwards. +static void GenerateCountedDigits(int count, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector<char> buffer, int* length) { + ASSERT(count >= 0); + for (int i = 0; i < count - 1; ++i) { + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. + // digit = numerator / denominator (integer division). + // numerator = numerator % denominator. + buffer[i] = static_cast<char>(digit + '0'); + // Prepare for next iteration. + numerator->Times10(); + } + // Generate the last digit. + uint16_t digit; + digit = numerator->DivideModuloIntBignum(*denominator); + if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { + digit++; + } + ASSERT(digit <= 10); + buffer[count - 1] = static_cast<char>(digit + '0'); + // Correct bad digits (in case we had a sequence of '9's). Propagate the + // carry until we hat a non-'9' or til we reach the first digit. + for (int i = count - 1; i > 0; --i) { + if (buffer[i] != '0' + 10) break; + buffer[i] = '0'; + buffer[i - 1]++; + } + if (buffer[0] == '0' + 10) { + // Propagate a carry past the top place. + buffer[0] = '1'; + (*decimal_point)++; + } + *length = count; +} + + +// Generates 'requested_digits' after the decimal point. It might omit +// trailing '0's. If the input number is too small then no digits at all are +// generated (ex.: 2 fixed digits for 0.00001). +// +// Input verifies: 1 <= (numerator + delta) / denominator < 10. +static void BignumToFixed(int requested_digits, int* decimal_point, + Bignum* numerator, Bignum* denominator, + Vector<char>(buffer), int* length) { + // Note that we have to look at more than just the requested_digits, since + // a number could be rounded up. Example: v=0.5 with requested_digits=0. + // Even though the power of v equals 0 we can't just stop here. + if (-(*decimal_point) > requested_digits) { + // The number is definitively too small. + // Ex: 0.001 with requested_digits == 1. + // Set decimal-point to -requested_digits. This is what Gay does. + // Note that it should not have any effect anyways since the string is + // empty. + *decimal_point = -requested_digits; + *length = 0; + return; + } else if (-(*decimal_point) == requested_digits) { + // We only need to verify if the number rounds down or up. + // Ex: 0.04 and 0.06 with requested_digits == 1. + ASSERT(*decimal_point == -requested_digits); + // Initially the fraction lies in range (1, 10]. Multiply the denominator + // by 10 so that we can compare more easily. + denominator->Times10(); + if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { + // If the fraction is >= 0.5 then we have to include the rounded + // digit. + buffer[0] = '1'; + *length = 1; + (*decimal_point)++; + } else { + // Note that we caught most of similar cases earlier. + *length = 0; + } + return; + } else { + // The requested digits correspond to the digits after the point. + // The variable 'needed_digits' includes the digits before the point. + int needed_digits = (*decimal_point) + requested_digits; + GenerateCountedDigits(needed_digits, decimal_point, + numerator, denominator, + buffer, length); + } +} + + +// Returns an estimation of k such that 10^(k-1) <= v < 10^k where +// v = f * 2^exponent and 2^52 <= f < 2^53. +// v is hence a normalized double with the given exponent. The output is an +// approximation for the exponent of the decimal approimation .digits * 10^k. +// +// The result might undershoot by 1 in which case 10^k <= v < 10^k+1. +// Note: this property holds for v's upper boundary m+ too. +// 10^k <= m+ < 10^k+1. +// (see explanation below). +// +// Examples: +// EstimatePower(0) => 16 +// EstimatePower(-52) => 0 +// +// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0. +static int EstimatePower(int exponent) { + // This function estimates log10 of v where v = f*2^e (with e == exponent). + // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)). + // Note that f is bounded by its container size. Let p = 53 (the double's + // significand size). Then 2^(p-1) <= f < 2^p. + // + // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close + // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)). + // The computed number undershoots by less than 0.631 (when we compute log3 + // and not log10). + // + // Optimization: since we only need an approximated result this computation + // can be performed on 64 bit integers. On x86/x64 architecture the speedup is + // not really measurable, though. + // + // Since we want to avoid overshooting we decrement by 1e10 so that + // floating-point imprecisions don't affect us. + // + // Explanation for v's boundary m+: the computation takes advantage of + // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement + // (even for denormals where the delta can be much more important). + + const double k1Log10 = 0.30102999566398114; // 1/lg(10) + + // For doubles len(f) == 53 (don't forget the hidden bit). + const int kSignificandSize = Double::kSignificandSize; + double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10); + return static_cast<int>(estimate); +} + + +// See comments for InitialScaledStartValues. +static void InitialScaledStartValuesPositiveExponent( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // A positive exponent implies a positive power. + ASSERT(estimated_power >= 0); + // Since the estimated_power is positive we simply multiply the denominator + // by 10^estimated_power. + + // numerator = v. + numerator->AssignUInt64(significand); + numerator->ShiftLeft(exponent); + // denominator = 10^estimated_power. + denominator->AssignPowerUInt16(10, estimated_power); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + denominator->ShiftLeft(1); + numerator->ShiftLeft(1); + // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common + // denominator (of 2) delta_plus equals 2^e. + delta_plus->AssignUInt16(1); + delta_plus->ShiftLeft(exponent); + // Same for delta_minus. The adjustments if f == 2^p-1 are done later. + delta_minus->AssignUInt16(1); + delta_minus->ShiftLeft(exponent); + } +} + + +// See comments for InitialScaledStartValues +static void InitialScaledStartValuesNegativeExponentPositivePower( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // v = f * 2^e with e < 0, and with estimated_power >= 0. + // This means that e is close to 0 (have a look at how estimated_power is + // computed). + + // numerator = significand + // since v = significand * 2^exponent this is equivalent to + // numerator = v * / 2^-exponent + numerator->AssignUInt64(significand); + // denominator = 10^estimated_power * 2^-exponent (with exponent < 0) + denominator->AssignPowerUInt16(10, estimated_power); + denominator->ShiftLeft(-exponent); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + denominator->ShiftLeft(1); + numerator->ShiftLeft(1); + // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common + // denominator (of 2) delta_plus equals 2^e. + // Given that the denominator already includes v's exponent the distance + // to the boundaries is simply 1. + delta_plus->AssignUInt16(1); + // Same for delta_minus. The adjustments if f == 2^p-1 are done later. + delta_minus->AssignUInt16(1); + } +} + + +// See comments for InitialScaledStartValues +static void InitialScaledStartValuesNegativeExponentNegativePower( + uint64_t significand, int exponent, + int estimated_power, bool need_boundary_deltas, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + // Instead of multiplying the denominator with 10^estimated_power we + // multiply all values (numerator and deltas) by 10^-estimated_power. + + // Use numerator as temporary container for power_ten. + Bignum* power_ten = numerator; + power_ten->AssignPowerUInt16(10, -estimated_power); + + if (need_boundary_deltas) { + // Since power_ten == numerator we must make a copy of 10^estimated_power + // before we complete the computation of the numerator. + // delta_plus = delta_minus = 10^estimated_power + delta_plus->AssignBignum(*power_ten); + delta_minus->AssignBignum(*power_ten); + } + + // numerator = significand * 2 * 10^-estimated_power + // since v = significand * 2^exponent this is equivalent to + // numerator = v * 10^-estimated_power * 2 * 2^-exponent. + // Remember: numerator has been abused as power_ten. So no need to assign it + // to itself. + ASSERT(numerator == power_ten); + numerator->MultiplyByUInt64(significand); + + // denominator = 2 * 2^-exponent with exponent < 0. + denominator->AssignUInt16(1); + denominator->ShiftLeft(-exponent); + + if (need_boundary_deltas) { + // Introduce a common denominator so that the deltas to the boundaries are + // integers. + numerator->ShiftLeft(1); + denominator->ShiftLeft(1); + // With this shift the boundaries have their correct value, since + // delta_plus = 10^-estimated_power, and + // delta_minus = 10^-estimated_power. + // These assignments have been done earlier. + // The adjustments if f == 2^p-1 (lower boundary is closer) are done later. + } +} + + +// Let v = significand * 2^exponent. +// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator +// and denominator. The functions GenerateShortestDigits and +// GenerateCountedDigits will then convert this ratio to its decimal +// representation d, with the required accuracy. +// Then d * 10^estimated_power is the representation of v. +// (Note: the fraction and the estimated_power might get adjusted before +// generating the decimal representation.) +// +// The initial start values consist of: +// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power. +// - a scaled (common) denominator. +// optionally (used by GenerateShortestDigits to decide if it has the shortest +// decimal converting back to v): +// - v - m-: the distance to the lower boundary. +// - m+ - v: the distance to the upper boundary. +// +// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator. +// +// Let ep == estimated_power, then the returned values will satisfy: +// v / 10^ep = numerator / denominator. +// v's boundarys m- and m+: +// m- / 10^ep == v / 10^ep - delta_minus / denominator +// m+ / 10^ep == v / 10^ep + delta_plus / denominator +// Or in other words: +// m- == v - delta_minus * 10^ep / denominator; +// m+ == v + delta_plus * 10^ep / denominator; +// +// Since 10^(k-1) <= v < 10^k (with k == estimated_power) +// or 10^k <= v < 10^(k+1) +// we then have 0.1 <= numerator/denominator < 1 +// or 1 <= numerator/denominator < 10 +// +// It is then easy to kickstart the digit-generation routine. +// +// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST +// or BIGNUM_DTOA_SHORTEST_SINGLE. + +static void InitialScaledStartValues(uint64_t significand, + int exponent, + bool lower_boundary_is_closer, + int estimated_power, + bool need_boundary_deltas, + Bignum* numerator, + Bignum* denominator, + Bignum* delta_minus, + Bignum* delta_plus) { + if (exponent >= 0) { + InitialScaledStartValuesPositiveExponent( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } else if (estimated_power >= 0) { + InitialScaledStartValuesNegativeExponentPositivePower( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } else { + InitialScaledStartValuesNegativeExponentNegativePower( + significand, exponent, estimated_power, need_boundary_deltas, + numerator, denominator, delta_minus, delta_plus); + } + + if (need_boundary_deltas && lower_boundary_is_closer) { + // The lower boundary is closer at half the distance of "normal" numbers. + // Increase the common denominator and adapt all but the delta_minus. + denominator->ShiftLeft(1); // *2 + numerator->ShiftLeft(1); // *2 + delta_plus->ShiftLeft(1); // *2 + } +} + + +// This routine multiplies numerator/denominator so that its values lies in the +// range 1-10. That is after a call to this function we have: +// 1 <= (numerator + delta_plus) /denominator < 10. +// Let numerator the input before modification and numerator' the argument +// after modification, then the output-parameter decimal_point is such that +// numerator / denominator * 10^estimated_power == +// numerator' / denominator' * 10^(decimal_point - 1) +// In some cases estimated_power was too low, and this is already the case. We +// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k == +// estimated_power) but do not touch the numerator or denominator. +// Otherwise the routine multiplies the numerator and the deltas by 10. +static void FixupMultiply10(int estimated_power, bool is_even, + int* decimal_point, + Bignum* numerator, Bignum* denominator, + Bignum* delta_minus, Bignum* delta_plus) { + bool in_range; + if (is_even) { + // For IEEE doubles half-way cases (in decimal system numbers ending with 5) + // are rounded to the closest floating-point number with even significand. + in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; + } else { + in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; + } + if (in_range) { + // Since numerator + delta_plus >= denominator we already have + // 1 <= numerator/denominator < 10. Simply update the estimated_power. + *decimal_point = estimated_power + 1; + } else { + *decimal_point = estimated_power; + numerator->Times10(); + if (Bignum::Equal(*delta_minus, *delta_plus)) { + delta_minus->Times10(); + delta_plus->AssignBignum(*delta_minus); + } else { + delta_minus->Times10(); + delta_plus->Times10(); + } + } +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.h b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.h new file mode 100644 index 0000000000..edc21b0f2e --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.h @@ -0,0 +1,102 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_ +#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +enum BignumDtoaMode { + // Return the shortest correct representation. + // For example the output of 0.299999999999999988897 is (the less accurate but + // correct) 0.3. + BIGNUM_DTOA_SHORTEST, + // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats. + BIGNUM_DTOA_SHORTEST_SINGLE, + // Return a fixed number of digits after the decimal point. + // For instance fixed(0.1, 4) becomes 0.1000 + // If the input number is big, the output will be big. + BIGNUM_DTOA_FIXED, + // Return a fixed number of digits, no matter what the exponent is. + BIGNUM_DTOA_PRECISION +}; + +// Converts the given double 'v' to ascii. +// The result should be interpreted as buffer * 10^(point-length). +// The buffer will be null-terminated. +// +// The input v must be > 0 and different from NaN, and Infinity. +// +// The output depends on the given mode: +// - SHORTEST: produce the least amount of digits for which the internal +// identity requirement is still satisfied. If the digits are printed +// (together with the correct exponent) then reading this number will give +// 'v' again. The buffer will choose the representation that is closest to +// 'v'. If there are two at the same distance, than the number is round up. +// In this mode the 'requested_digits' parameter is ignored. +// - FIXED: produces digits necessary to print a given number with +// 'requested_digits' digits after the decimal point. The produced digits +// might be too short in which case the caller has to fill the gaps with '0's. +// Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. +// Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns +// buffer="2", point=0. +// Note: the length of the returned buffer has no meaning wrt the significance +// of its digits. That is, just because it contains '0's does not mean that +// any other digit would not satisfy the internal identity requirement. +// - PRECISION: produces 'requested_digits' where the first digit is not '0'. +// Even though the length of produced digits usually equals +// 'requested_digits', the function is allowed to return fewer digits, in +// which case the caller has to fill the missing digits with '0's. +// Halfway cases are again rounded up. +// 'BignumDtoa' expects the given buffer to be big enough to hold all digits +// and a terminating null-character. +void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, + Vector<char> buffer, int* length, int* point); + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_BIGNUM_DTOA_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-bignum.cpp b/deps/icu-small/source/i18n/double-conversion-bignum.cpp new file mode 100644 index 0000000000..d5682af35f --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum.cpp @@ -0,0 +1,784 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-bignum.h" +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +Bignum::Bignum() + : bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) { + for (int i = 0; i < kBigitCapacity; ++i) { + bigits_[i] = 0; + } +} + + +template<typename S> +static int BitSize(S value) { + (void) value; // Mark variable as used. + return 8 * sizeof(value); +} + +// Guaranteed to lie in one Bigit. +void Bignum::AssignUInt16(uint16_t value) { + ASSERT(kBigitSize >= BitSize(value)); + Zero(); + if (value == 0) return; + + EnsureCapacity(1); + bigits_[0] = value; + used_digits_ = 1; +} + + +void Bignum::AssignUInt64(uint64_t value) { + const int kUInt64Size = 64; + + Zero(); + if (value == 0) return; + + int needed_bigits = kUInt64Size / kBigitSize + 1; + EnsureCapacity(needed_bigits); + for (int i = 0; i < needed_bigits; ++i) { + bigits_[i] = value & kBigitMask; + value = value >> kBigitSize; + } + used_digits_ = needed_bigits; + Clamp(); +} + + +void Bignum::AssignBignum(const Bignum& other) { + exponent_ = other.exponent_; + for (int i = 0; i < other.used_digits_; ++i) { + bigits_[i] = other.bigits_[i]; + } + // Clear the excess digits (if there were any). + for (int i = other.used_digits_; i < used_digits_; ++i) { + bigits_[i] = 0; + } + used_digits_ = other.used_digits_; +} + + +static uint64_t ReadUInt64(Vector<const char> buffer, + int from, + int digits_to_read) { + uint64_t result = 0; + for (int i = from; i < from + digits_to_read; ++i) { + int digit = buffer[i] - '0'; + ASSERT(0 <= digit && digit <= 9); + result = result * 10 + digit; + } + return result; +} + + +void Bignum::AssignDecimalString(Vector<const char> value) { + // 2^64 = 18446744073709551616 > 10^19 + const int kMaxUint64DecimalDigits = 19; + Zero(); + int length = value.length(); + unsigned int pos = 0; + // Let's just say that each digit needs 4 bits. + while (length >= kMaxUint64DecimalDigits) { + uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); + pos += kMaxUint64DecimalDigits; + length -= kMaxUint64DecimalDigits; + MultiplyByPowerOfTen(kMaxUint64DecimalDigits); + AddUInt64(digits); + } + uint64_t digits = ReadUInt64(value, pos, length); + MultiplyByPowerOfTen(length); + AddUInt64(digits); + Clamp(); +} + + +static int HexCharValue(char c) { + if ('0' <= c && c <= '9') return c - '0'; + if ('a' <= c && c <= 'f') return 10 + c - 'a'; + ASSERT('A' <= c && c <= 'F'); + return 10 + c - 'A'; +} + + +void Bignum::AssignHexString(Vector<const char> value) { + Zero(); + int length = value.length(); + + int needed_bigits = length * 4 / kBigitSize + 1; + EnsureCapacity(needed_bigits); + int string_index = length - 1; + for (int i = 0; i < needed_bigits - 1; ++i) { + // These bigits are guaranteed to be "full". + Chunk current_bigit = 0; + for (int j = 0; j < kBigitSize / 4; j++) { + current_bigit += HexCharValue(value[string_index--]) << (j * 4); + } + bigits_[i] = current_bigit; + } + used_digits_ = needed_bigits - 1; + + Chunk most_significant_bigit = 0; // Could be = 0; + for (int j = 0; j <= string_index; ++j) { + most_significant_bigit <<= 4; + most_significant_bigit += HexCharValue(value[j]); + } + if (most_significant_bigit != 0) { + bigits_[used_digits_] = most_significant_bigit; + used_digits_++; + } + Clamp(); +} + + +void Bignum::AddUInt64(uint64_t operand) { + if (operand == 0) return; + Bignum other; + other.AssignUInt64(operand); + AddBignum(other); +} + + +void Bignum::AddBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + + // If this has a greater exponent than other append zero-bigits to this. + // After this call exponent_ <= other.exponent_. + Align(other); + + // There are two possibilities: + // aaaaaaaaaaa 0000 (where the 0s represent a's exponent) + // bbbbb 00000000 + // ---------------- + // ccccccccccc 0000 + // or + // aaaaaaaaaa 0000 + // bbbbbbbbb 0000000 + // ----------------- + // cccccccccccc 0000 + // In both cases we might need a carry bigit. + + EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_); + Chunk carry = 0; + int bigit_pos = other.exponent_ - exponent_; + ASSERT(bigit_pos >= 0); + for (int i = 0; i < other.used_digits_; ++i) { + Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry; + bigits_[bigit_pos] = sum & kBigitMask; + carry = sum >> kBigitSize; + bigit_pos++; + } + + while (carry != 0) { + Chunk sum = bigits_[bigit_pos] + carry; + bigits_[bigit_pos] = sum & kBigitMask; + carry = sum >> kBigitSize; + bigit_pos++; + } + used_digits_ = Max(bigit_pos, used_digits_); + ASSERT(IsClamped()); +} + + +void Bignum::SubtractBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + // We require this to be bigger than other. + ASSERT(LessEqual(other, *this)); + + Align(other); + + int offset = other.exponent_ - exponent_; + Chunk borrow = 0; + int i; + for (i = 0; i < other.used_digits_; ++i) { + ASSERT((borrow == 0) || (borrow == 1)); + Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow; + bigits_[i + offset] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + } + while (borrow != 0) { + Chunk difference = bigits_[i + offset] - borrow; + bigits_[i + offset] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + ++i; + } + Clamp(); +} + + +void Bignum::ShiftLeft(int shift_amount) { + if (used_digits_ == 0) return; + exponent_ += shift_amount / kBigitSize; + int local_shift = shift_amount % kBigitSize; + EnsureCapacity(used_digits_ + 1); + BigitsShiftLeft(local_shift); +} + + +void Bignum::MultiplyByUInt32(uint32_t factor) { + if (factor == 1) return; + if (factor == 0) { + Zero(); + return; + } + if (used_digits_ == 0) return; + + // The product of a bigit with the factor is of size kBigitSize + 32. + // Assert that this number + 1 (for the carry) fits into double chunk. + ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1); + DoubleChunk carry = 0; + for (int i = 0; i < used_digits_; ++i) { + DoubleChunk product = static_cast<DoubleChunk>(factor) * bigits_[i] + carry; + bigits_[i] = static_cast<Chunk>(product & kBigitMask); + carry = (product >> kBigitSize); + } + while (carry != 0) { + EnsureCapacity(used_digits_ + 1); + bigits_[used_digits_] = carry & kBigitMask; + used_digits_++; + carry >>= kBigitSize; + } +} + + +void Bignum::MultiplyByUInt64(uint64_t factor) { + if (factor == 1) return; + if (factor == 0) { + Zero(); + return; + } + ASSERT(kBigitSize < 32); + uint64_t carry = 0; + uint64_t low = factor & 0xFFFFFFFF; + uint64_t high = factor >> 32; + for (int i = 0; i < used_digits_; ++i) { + uint64_t product_low = low * bigits_[i]; + uint64_t product_high = high * bigits_[i]; + uint64_t tmp = (carry & kBigitMask) + product_low; + bigits_[i] = tmp & kBigitMask; + carry = (carry >> kBigitSize) + (tmp >> kBigitSize) + + (product_high << (32 - kBigitSize)); + } + while (carry != 0) { + EnsureCapacity(used_digits_ + 1); + bigits_[used_digits_] = carry & kBigitMask; + used_digits_++; + carry >>= kBigitSize; + } +} + + +void Bignum::MultiplyByPowerOfTen(int exponent) { + const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d); + const uint16_t kFive1 = 5; + const uint16_t kFive2 = kFive1 * 5; + const uint16_t kFive3 = kFive2 * 5; + const uint16_t kFive4 = kFive3 * 5; + const uint16_t kFive5 = kFive4 * 5; + const uint16_t kFive6 = kFive5 * 5; + const uint32_t kFive7 = kFive6 * 5; + const uint32_t kFive8 = kFive7 * 5; + const uint32_t kFive9 = kFive8 * 5; + const uint32_t kFive10 = kFive9 * 5; + const uint32_t kFive11 = kFive10 * 5; + const uint32_t kFive12 = kFive11 * 5; + const uint32_t kFive13 = kFive12 * 5; + const uint32_t kFive1_to_12[] = + { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6, + kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 }; + + ASSERT(exponent >= 0); + if (exponent == 0) return; + if (used_digits_ == 0) return; + + // We shift by exponent at the end just before returning. + int remaining_exponent = exponent; + while (remaining_exponent >= 27) { + MultiplyByUInt64(kFive27); + remaining_exponent -= 27; + } + while (remaining_exponent >= 13) { + MultiplyByUInt32(kFive13); + remaining_exponent -= 13; + } + if (remaining_exponent > 0) { + MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]); + } + ShiftLeft(exponent); +} + + +void Bignum::Square() { + ASSERT(IsClamped()); + int product_length = 2 * used_digits_; + EnsureCapacity(product_length); + + // Comba multiplication: compute each column separately. + // Example: r = a2a1a0 * b2b1b0. + // r = 1 * a0b0 + + // 10 * (a1b0 + a0b1) + + // 100 * (a2b0 + a1b1 + a0b2) + + // 1000 * (a2b1 + a1b2) + + // 10000 * a2b2 + // + // In the worst case we have to accumulate nb-digits products of digit*digit. + // + // Assert that the additional number of bits in a DoubleChunk are enough to + // sum up used_digits of Bigit*Bigit. + if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) { + UNIMPLEMENTED(); + } + DoubleChunk accumulator = 0; + // First shift the digits so we don't overwrite them. + int copy_offset = used_digits_; + for (int i = 0; i < used_digits_; ++i) { + bigits_[copy_offset + i] = bigits_[i]; + } + // We have two loops to avoid some 'if's in the loop. + for (int i = 0; i < used_digits_; ++i) { + // Process temporary digit i with power i. + // The sum of the two indices must be equal to i. + int bigit_index1 = i; + int bigit_index2 = 0; + // Sum all of the sub-products. + while (bigit_index1 >= 0) { + Chunk chunk1 = bigits_[copy_offset + bigit_index1]; + Chunk chunk2 = bigits_[copy_offset + bigit_index2]; + accumulator += static_cast<DoubleChunk>(chunk1) * chunk2; + bigit_index1--; + bigit_index2++; + } + bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask; + accumulator >>= kBigitSize; + } + for (int i = used_digits_; i < product_length; ++i) { + int bigit_index1 = used_digits_ - 1; + int bigit_index2 = i - bigit_index1; + // Invariant: sum of both indices is again equal to i. + // Inner loop runs 0 times on last iteration, emptying accumulator. + while (bigit_index2 < used_digits_) { + Chunk chunk1 = bigits_[copy_offset + bigit_index1]; + Chunk chunk2 = bigits_[copy_offset + bigit_index2]; + accumulator += static_cast<DoubleChunk>(chunk1) * chunk2; + bigit_index1--; + bigit_index2++; + } + // The overwritten bigits_[i] will never be read in further loop iterations, + // because bigit_index1 and bigit_index2 are always greater + // than i - used_digits_. + bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask; + accumulator >>= kBigitSize; + } + // Since the result was guaranteed to lie inside the number the + // accumulator must be 0 now. + ASSERT(accumulator == 0); + + // Don't forget to update the used_digits and the exponent. + used_digits_ = product_length; + exponent_ *= 2; + Clamp(); +} + + +void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) { + ASSERT(base != 0); + ASSERT(power_exponent >= 0); + if (power_exponent == 0) { + AssignUInt16(1); + return; + } + Zero(); + int shifts = 0; + // We expect base to be in range 2-32, and most often to be 10. + // It does not make much sense to implement different algorithms for counting + // the bits. + while ((base & 1) == 0) { + base >>= 1; + shifts++; + } + int bit_size = 0; + int tmp_base = base; + while (tmp_base != 0) { + tmp_base >>= 1; + bit_size++; + } + int final_size = bit_size * power_exponent; + // 1 extra bigit for the shifting, and one for rounded final_size. + EnsureCapacity(final_size / kBigitSize + 2); + + // Left to Right exponentiation. + int mask = 1; + while (power_exponent >= mask) mask <<= 1; + + // The mask is now pointing to the bit above the most significant 1-bit of + // power_exponent. + // Get rid of first 1-bit; + mask >>= 2; + uint64_t this_value = base; + + bool delayed_multipliciation = false; + const uint64_t max_32bits = 0xFFFFFFFF; + while (mask != 0 && this_value <= max_32bits) { + this_value = this_value * this_value; + // Verify that there is enough space in this_value to perform the + // multiplication. The first bit_size bits must be 0. + if ((power_exponent & mask) != 0) { + uint64_t base_bits_mask = + ~((static_cast<uint64_t>(1) << (64 - bit_size)) - 1); + bool high_bits_zero = (this_value & base_bits_mask) == 0; + if (high_bits_zero) { + this_value *= base; + } else { + delayed_multipliciation = true; + } + } + mask >>= 1; + } + AssignUInt64(this_value); + if (delayed_multipliciation) { + MultiplyByUInt32(base); + } + + // Now do the same thing as a bignum. + while (mask != 0) { + Square(); + if ((power_exponent & mask) != 0) { + MultiplyByUInt32(base); + } + mask >>= 1; + } + + // And finally add the saved shifts. + ShiftLeft(shifts * power_exponent); +} + + +// Precondition: this/other < 16bit. +uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { + ASSERT(IsClamped()); + ASSERT(other.IsClamped()); + ASSERT(other.used_digits_ > 0); + + // Easy case: if we have less digits than the divisor than the result is 0. + // Note: this handles the case where this == 0, too. + if (BigitLength() < other.BigitLength()) { + return 0; + } + + Align(other); + + uint16_t result = 0; + + // Start by removing multiples of 'other' until both numbers have the same + // number of digits. + while (BigitLength() > other.BigitLength()) { + // This naive approach is extremely inefficient if `this` divided by other + // is big. This function is implemented for doubleToString where + // the result should be small (less than 10). + ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); + ASSERT(bigits_[used_digits_ - 1] < 0x10000); + // Remove the multiples of the first digit. + // Example this = 23 and other equals 9. -> Remove 2 multiples. + result += static_cast<uint16_t>(bigits_[used_digits_ - 1]); + SubtractTimes(other, bigits_[used_digits_ - 1]); + } + + ASSERT(BigitLength() == other.BigitLength()); + + // Both bignums are at the same length now. + // Since other has more than 0 digits we know that the access to + // bigits_[used_digits_ - 1] is safe. + Chunk this_bigit = bigits_[used_digits_ - 1]; + Chunk other_bigit = other.bigits_[other.used_digits_ - 1]; + + if (other.used_digits_ == 1) { + // Shortcut for easy (and common) case. + int quotient = this_bigit / other_bigit; + bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; + ASSERT(quotient < 0x10000); + result += static_cast<uint16_t>(quotient); + Clamp(); + return result; + } + + int division_estimate = this_bigit / (other_bigit + 1); + ASSERT(division_estimate < 0x10000); + result += static_cast<uint16_t>(division_estimate); + SubtractTimes(other, division_estimate); + + if (other_bigit * (division_estimate + 1) > this_bigit) { + // No need to even try to subtract. Even if other's remaining digits were 0 + // another subtraction would be too much. + return result; + } + + while (LessEqual(other, *this)) { + SubtractBignum(other); + result++; + } + return result; +} + + +template<typename S> +static int SizeInHexChars(S number) { + ASSERT(number > 0); + int result = 0; + while (number != 0) { + number >>= 4; + result++; + } + return result; +} + + +static char HexCharOfValue(int value) { + ASSERT(0 <= value && value <= 16); + if (value < 10) return static_cast<char>(value + '0'); + return static_cast<char>(value - 10 + 'A'); +} + + +bool Bignum::ToHexString(char* buffer, int buffer_size) const { + ASSERT(IsClamped()); + // Each bigit must be printable as separate hex-character. + ASSERT(kBigitSize % 4 == 0); + const int kHexCharsPerBigit = kBigitSize / 4; + + if (used_digits_ == 0) { + if (buffer_size < 2) return false; + buffer[0] = '0'; + buffer[1] = '\0'; + return true; + } + // We add 1 for the terminating '\0' character. + int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit + + SizeInHexChars(bigits_[used_digits_ - 1]) + 1; + if (needed_chars > buffer_size) return false; + int string_index = needed_chars - 1; + buffer[string_index--] = '\0'; + for (int i = 0; i < exponent_; ++i) { + for (int j = 0; j < kHexCharsPerBigit; ++j) { + buffer[string_index--] = '0'; + } + } + for (int i = 0; i < used_digits_ - 1; ++i) { + Chunk current_bigit = bigits_[i]; + for (int j = 0; j < kHexCharsPerBigit; ++j) { + buffer[string_index--] = HexCharOfValue(current_bigit & 0xF); + current_bigit >>= 4; + } + } + // And finally the last bigit. + Chunk most_significant_bigit = bigits_[used_digits_ - 1]; + while (most_significant_bigit != 0) { + buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF); + most_significant_bigit >>= 4; + } + return true; +} + + +Bignum::Chunk Bignum::BigitAt(int index) const { + if (index >= BigitLength()) return 0; + if (index < exponent_) return 0; + return bigits_[index - exponent_]; +} + + +int Bignum::Compare(const Bignum& a, const Bignum& b) { + ASSERT(a.IsClamped()); + ASSERT(b.IsClamped()); + int bigit_length_a = a.BigitLength(); + int bigit_length_b = b.BigitLength(); + if (bigit_length_a < bigit_length_b) return -1; + if (bigit_length_a > bigit_length_b) return +1; + for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) { + Chunk bigit_a = a.BigitAt(i); + Chunk bigit_b = b.BigitAt(i); + if (bigit_a < bigit_b) return -1; + if (bigit_a > bigit_b) return +1; + // Otherwise they are equal up to this digit. Try the next digit. + } + return 0; +} + + +int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) { + ASSERT(a.IsClamped()); + ASSERT(b.IsClamped()); + ASSERT(c.IsClamped()); + if (a.BigitLength() < b.BigitLength()) { + return PlusCompare(b, a, c); + } + if (a.BigitLength() + 1 < c.BigitLength()) return -1; + if (a.BigitLength() > c.BigitLength()) return +1; + // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than + // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one + // of 'a'. + if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) { + return -1; + } + + Chunk borrow = 0; + // Starting at min_exponent all digits are == 0. So no need to compare them. + int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_); + for (int i = c.BigitLength() - 1; i >= min_exponent; --i) { + Chunk chunk_a = a.BigitAt(i); + Chunk chunk_b = b.BigitAt(i); + Chunk chunk_c = c.BigitAt(i); + Chunk sum = chunk_a + chunk_b; + if (sum > chunk_c + borrow) { + return +1; + } else { + borrow = chunk_c + borrow - sum; + if (borrow > 1) return -1; + borrow <<= kBigitSize; + } + } + if (borrow == 0) return 0; + return -1; +} + + +void Bignum::Clamp() { + while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) { + used_digits_--; + } + if (used_digits_ == 0) { + // Zero. + exponent_ = 0; + } +} + + +bool Bignum::IsClamped() const { + return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0; +} + + +void Bignum::Zero() { + for (int i = 0; i < used_digits_; ++i) { + bigits_[i] = 0; + } + used_digits_ = 0; + exponent_ = 0; +} + + +void Bignum::Align(const Bignum& other) { + if (exponent_ > other.exponent_) { + // If "X" represents a "hidden" digit (by the exponent) then we are in the + // following case (a == this, b == other): + // a: aaaaaaXXXX or a: aaaaaXXX + // b: bbbbbbX b: bbbbbbbbXX + // We replace some of the hidden digits (X) of a with 0 digits. + // a: aaaaaa000X or a: aaaaa0XX + int zero_digits = exponent_ - other.exponent_; + EnsureCapacity(used_digits_ + zero_digits); + for (int i = used_digits_ - 1; i >= 0; --i) { + bigits_[i + zero_digits] = bigits_[i]; + } + for (int i = 0; i < zero_digits; ++i) { + bigits_[i] = 0; + } + used_digits_ += zero_digits; + exponent_ -= zero_digits; + ASSERT(used_digits_ >= 0); + ASSERT(exponent_ >= 0); + } +} + + +void Bignum::BigitsShiftLeft(int shift_amount) { + ASSERT(shift_amount < kBigitSize); + ASSERT(shift_amount >= 0); + Chunk carry = 0; + for (int i = 0; i < used_digits_; ++i) { + Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount); + bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask; + carry = new_carry; + } + if (carry != 0) { + bigits_[used_digits_] = carry; + used_digits_++; + } +} + + +void Bignum::SubtractTimes(const Bignum& other, int factor) { + ASSERT(exponent_ <= other.exponent_); + if (factor < 3) { + for (int i = 0; i < factor; ++i) { + SubtractBignum(other); + } + return; + } + Chunk borrow = 0; + int exponent_diff = other.exponent_ - exponent_; + for (int i = 0; i < other.used_digits_; ++i) { + DoubleChunk product = static_cast<DoubleChunk>(factor) * other.bigits_[i]; + DoubleChunk remove = borrow + product; + Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask); + bigits_[i + exponent_diff] = difference & kBigitMask; + borrow = static_cast<Chunk>((difference >> (kChunkSize - 1)) + + (remove >> kBigitSize)); + } + for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) { + if (borrow == 0) return; + Chunk difference = bigits_[i] - borrow; + bigits_[i] = difference & kBigitMask; + borrow = difference >> (kChunkSize - 1); + } + Clamp(); +} + + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-bignum.h b/deps/icu-small/source/i18n/double-conversion-bignum.h new file mode 100644 index 0000000000..d1af3bf5e7 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-bignum.h @@ -0,0 +1,162 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_BIGNUM_H_ +#define DOUBLE_CONVERSION_BIGNUM_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +class Bignum { + public: + // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately. + // This bignum can encode much bigger numbers, since it contains an + // exponent. + static const int kMaxSignificantBits = 3584; + + Bignum(); + void AssignUInt16(uint16_t value); + void AssignUInt64(uint64_t value); + void AssignBignum(const Bignum& other); + + void AssignDecimalString(Vector<const char> value); + void AssignHexString(Vector<const char> value); + + void AssignPowerUInt16(uint16_t base, int exponent); + + void AddUInt64(uint64_t operand); + void AddBignum(const Bignum& other); + // Precondition: this >= other. + void SubtractBignum(const Bignum& other); + + void Square(); + void ShiftLeft(int shift_amount); + void MultiplyByUInt32(uint32_t factor); + void MultiplyByUInt64(uint64_t factor); + void MultiplyByPowerOfTen(int exponent); + void Times10() { return MultiplyByUInt32(10); } + // Pseudocode: + // int result = this / other; + // this = this % other; + // In the worst case this function is in O(this/other). + uint16_t DivideModuloIntBignum(const Bignum& other); + + bool ToHexString(char* buffer, int buffer_size) const; + + // Returns + // -1 if a < b, + // 0 if a == b, and + // +1 if a > b. + static int Compare(const Bignum& a, const Bignum& b); + static bool Equal(const Bignum& a, const Bignum& b) { + return Compare(a, b) == 0; + } + static bool LessEqual(const Bignum& a, const Bignum& b) { + return Compare(a, b) <= 0; + } + static bool Less(const Bignum& a, const Bignum& b) { + return Compare(a, b) < 0; + } + // Returns Compare(a + b, c); + static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c); + // Returns a + b == c + static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) == 0; + } + // Returns a + b <= c + static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) <= 0; + } + // Returns a + b < c + static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) { + return PlusCompare(a, b, c) < 0; + } + private: + typedef uint32_t Chunk; + typedef uint64_t DoubleChunk; + + static const int kChunkSize = sizeof(Chunk) * 8; + static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8; + // With bigit size of 28 we loose some bits, but a double still fits easily + // into two chunks, and more importantly we can use the Comba multiplication. + static const int kBigitSize = 28; + static const Chunk kBigitMask = (1 << kBigitSize) - 1; + // Every instance allocates kBigitLength chunks on the stack. Bignums cannot + // grow. There are no checks if the stack-allocated space is sufficient. + static const int kBigitCapacity = kMaxSignificantBits / kBigitSize; + + void EnsureCapacity(int size) { + if (size > kBigitCapacity) { + UNREACHABLE(); + } + } + void Align(const Bignum& other); + void Clamp(); + bool IsClamped() const; + void Zero(); + // Requires this to have enough capacity (no tests done). + // Updates used_digits_ if necessary. + // shift_amount must be < kBigitSize. + void BigitsShiftLeft(int shift_amount); + // BigitLength includes the "hidden" digits encoded in the exponent. + int BigitLength() const { return used_digits_ + exponent_; } + Chunk BigitAt(int index) const; + void SubtractTimes(const Bignum& other, int factor); + + Chunk bigits_buffer_[kBigitCapacity]; + // A vector backed by bigits_buffer_. This way accesses to the array are + // checked for out-of-bounds errors. + Vector<Chunk> bigits_; + int used_digits_; + // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize). + int exponent_; + + DISALLOW_COPY_AND_ASSIGN(Bignum); +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_BIGNUM_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-cached-powers.cpp b/deps/icu-small/source/i18n/double-conversion-cached-powers.cpp new file mode 100644 index 0000000000..e49700444c --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-cached-powers.cpp @@ -0,0 +1,193 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2006-2008 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#include <stdarg.h> +#include <limits.h> +#include <math.h> + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +#include "double-conversion-cached-powers.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +struct CachedPower { + uint64_t significand; + int16_t binary_exponent; + int16_t decimal_exponent; +}; + +static const CachedPower kCachedPowers[] = { + {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348}, + {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340}, + {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332}, + {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324}, + {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316}, + {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308}, + {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300}, + {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292}, + {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284}, + {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276}, + {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268}, + {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260}, + {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252}, + {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244}, + {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236}, + {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228}, + {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220}, + {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212}, + {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204}, + {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196}, + {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188}, + {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180}, + {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172}, + {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164}, + {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156}, + {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148}, + {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140}, + {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132}, + {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124}, + {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116}, + {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108}, + {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100}, + {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92}, + {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84}, + {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76}, + {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68}, + {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60}, + {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52}, + {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44}, + {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36}, + {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28}, + {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20}, + {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12}, + {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4}, + {UINT64_2PART_C(0x9c400000, 00000000), -50, 4}, + {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12}, + {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20}, + {UINT64_2PART_C(0x813f3978, f8940984), 30, 28}, + {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36}, + {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44}, + {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52}, + {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60}, + {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68}, + {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76}, + {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84}, + {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92}, + {UINT64_2PART_C(0x924d692c, a61be758), 269, 100}, + {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108}, + {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116}, + {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124}, + {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132}, + {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140}, + {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148}, + {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156}, + {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164}, + {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172}, + {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180}, + {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188}, + {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196}, + {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204}, + {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212}, + {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220}, + {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228}, + {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236}, + {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244}, + {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252}, + {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260}, + {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268}, + {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276}, + {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284}, + {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292}, + {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300}, + {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308}, + {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316}, + {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324}, + {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332}, + {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, +}; + +static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent. +static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10) +// Difference between the decimal exponents in the table above. +const int PowersOfTenCache::kDecimalExponentDistance = 8; +const int PowersOfTenCache::kMinDecimalExponent = -348; +const int PowersOfTenCache::kMaxDecimalExponent = 340; + +void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + int min_exponent, + int max_exponent, + DiyFp* power, + int* decimal_exponent) { + int kQ = DiyFp::kSignificandSize; + double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10); + int foo = kCachedPowersOffset; + int index = + (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1; + ASSERT(0 <= index && index < static_cast<int>(ARRAY_SIZE(kCachedPowers))); + CachedPower cached_power = kCachedPowers[index]; + ASSERT(min_exponent <= cached_power.binary_exponent); + (void) max_exponent; // Mark variable as used. + ASSERT(cached_power.binary_exponent <= max_exponent); + *decimal_exponent = cached_power.decimal_exponent; + *power = DiyFp(cached_power.significand, cached_power.binary_exponent); +} + + +void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent, + DiyFp* power, + int* found_exponent) { + ASSERT(kMinDecimalExponent <= requested_exponent); + ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance); + int index = + (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; + CachedPower cached_power = kCachedPowers[index]; + *power = DiyFp(cached_power.significand, cached_power.binary_exponent); + *found_exponent = cached_power.decimal_exponent; + ASSERT(*found_exponent <= requested_exponent); + ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance); +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-cached-powers.h b/deps/icu-small/source/i18n/double-conversion-cached-powers.h new file mode 100644 index 0000000000..438746b143 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-cached-powers.h @@ -0,0 +1,82 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_ +#define DOUBLE_CONVERSION_CACHED_POWERS_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-diy-fp.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +class PowersOfTenCache { + public: + + // Not all powers of ten are cached. The decimal exponent of two neighboring + // cached numbers will differ by kDecimalExponentDistance. + static const int kDecimalExponentDistance; + + static const int kMinDecimalExponent; + static const int kMaxDecimalExponent; + + // Returns a cached power-of-ten with a binary exponent in the range + // [min_exponent; max_exponent] (boundaries included). + static void GetCachedPowerForBinaryExponentRange(int min_exponent, + int max_exponent, + DiyFp* power, + int* decimal_exponent); + + // Returns a cached power of ten x ~= 10^k such that + // k <= decimal_exponent < k + kCachedPowersDecimalDistance. + // The given decimal_exponent must satisfy + // kMinDecimalExponent <= requested_exponent, and + // requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance. + static void GetCachedPowerForDecimalExponent(int requested_exponent, + DiyFp* power, + int* found_exponent); +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_CACHED_POWERS_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-diy-fp.cpp b/deps/icu-small/source/i18n/double-conversion-diy-fp.cpp new file mode 100644 index 0000000000..f38430c6c3 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-diy-fp.cpp @@ -0,0 +1,74 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-diy-fp.h" +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +void DiyFp::Multiply(const DiyFp& other) { + // Simply "emulates" a 128 bit multiplication. + // However: the resulting number only contains 64 bits. The least + // significant 64 bits are only used for rounding the most significant 64 + // bits. + const uint64_t kM32 = 0xFFFFFFFFU; + uint64_t a = f_ >> 32; + uint64_t b = f_ & kM32; + uint64_t c = other.f_ >> 32; + uint64_t d = other.f_ & kM32; + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); + // By adding 1U << 31 to tmp we round the final result. + // Halfway cases will be round up. + tmp += 1U << 31; + uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); + e_ += other.e_ + 64; + f_ = result_f; +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-diy-fp.h b/deps/icu-small/source/i18n/double-conversion-diy-fp.h new file mode 100644 index 0000000000..21896851d2 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-diy-fp.h @@ -0,0 +1,136 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_DIY_FP_H_ +#define DOUBLE_CONVERSION_DIY_FP_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +// This "Do It Yourself Floating Point" class implements a floating-point number +// with a uint64 significand and an int exponent. Normalized DiyFp numbers will +// have the most significant bit of the significand set. +// Multiplication and Subtraction do not normalize their results. +// DiyFp are not designed to contain special doubles (NaN and Infinity). +class DiyFp { + public: + static const int kSignificandSize = 64; + + DiyFp() : f_(0), e_(0) {} + DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {} + + // this = this - other. + // The exponents of both numbers must be the same and the significand of this + // must be bigger than the significand of other. + // The result will not be normalized. + void Subtract(const DiyFp& other) { + ASSERT(e_ == other.e_); + ASSERT(f_ >= other.f_); + f_ -= other.f_; + } + + // Returns a - b. + // The exponents of both numbers must be the same and this must be bigger + // than other. The result will not be normalized. + static DiyFp Minus(const DiyFp& a, const DiyFp& b) { + DiyFp result = a; + result.Subtract(b); + return result; + } + + + // this = this * other. + void Multiply(const DiyFp& other); + + // returns a * b; + static DiyFp Times(const DiyFp& a, const DiyFp& b) { + DiyFp result = a; + result.Multiply(b); + return result; + } + + void Normalize() { + ASSERT(f_ != 0); + uint64_t significand = f_; + int exponent = e_; + + // This method is mainly called for normalizing boundaries. In general + // boundaries need to be shifted by 10 bits. We thus optimize for this case. + const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); + while ((significand & k10MSBits) == 0) { + significand <<= 10; + exponent -= 10; + } + while ((significand & kUint64MSB) == 0) { + significand <<= 1; + exponent--; + } + f_ = significand; + e_ = exponent; + } + + static DiyFp Normalize(const DiyFp& a) { + DiyFp result = a; + result.Normalize(); + return result; + } + + uint64_t f() const { return f_; } + int e() const { return e_; } + + void set_f(uint64_t new_value) { f_ = new_value; } + void set_e(int new_value) { e_ = new_value; } + + private: + static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000); + + uint64_t f_; + int e_; +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_DIY_FP_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-fast-dtoa.cpp b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.cpp new file mode 100644 index 0000000000..8d1499a79b --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.cpp @@ -0,0 +1,683 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-fast-dtoa.h" + +#include "double-conversion-cached-powers.h" +#include "double-conversion-diy-fp.h" +#include "double-conversion-ieee.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +// The minimal and maximal target exponent define the range of w's binary +// exponent, where 'w' is the result of multiplying the input by a cached power +// of ten. +// +// A different range might be chosen on a different platform, to optimize digit +// generation, but a smaller range requires more powers of ten to be cached. +static const int kMinimalTargetExponent = -60; +static const int kMaximalTargetExponent = -32; + + +// Adjusts the last digit of the generated number, and screens out generated +// solutions that may be inaccurate. A solution may be inaccurate if it is +// outside the safe interval, or if we cannot prove that it is closer to the +// input than a neighboring representation of the same length. +// +// Input: * buffer containing the digits of too_high / 10^kappa +// * the buffer's length +// * distance_too_high_w == (too_high - w).f() * unit +// * unsafe_interval == (too_high - too_low).f() * unit +// * rest = (too_high - buffer * 10^kappa).f() * unit +// * ten_kappa = 10^kappa * unit +// * unit = the common multiplier +// Output: returns true if the buffer is guaranteed to contain the closest +// representable number to the input. +// Modifies the generated digits in the buffer to approach (round towards) w. +static bool RoundWeed(Vector<char> buffer, + int length, + uint64_t distance_too_high_w, + uint64_t unsafe_interval, + uint64_t rest, + uint64_t ten_kappa, + uint64_t unit) { + uint64_t small_distance = distance_too_high_w - unit; + uint64_t big_distance = distance_too_high_w + unit; + // Let w_low = too_high - big_distance, and + // w_high = too_high - small_distance. + // Note: w_low < w < w_high + // + // The real w (* unit) must lie somewhere inside the interval + // ]w_low; w_high[ (often written as "(w_low; w_high)") + + // Basically the buffer currently contains a number in the unsafe interval + // ]too_low; too_high[ with too_low < w < too_high + // + // too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // ^v 1 unit ^ ^ ^ ^ + // boundary_high --------------------- . . . . + // ^v 1 unit . . . . + // - - - - - - - - - - - - - - - - - - - + - - + - - - - - - . . + // . . ^ . . + // . big_distance . . . + // . . . . rest + // small_distance . . . . + // v . . . . + // w_high - - - - - - - - - - - - - - - - - - . . . . + // ^v 1 unit . . . . + // w ---------------------------------------- . . . . + // ^v 1 unit v . . . + // w_low - - - - - - - - - - - - - - - - - - - - - . . . + // . . v + // buffer --------------------------------------------------+-------+-------- + // . . + // safe_interval . + // v . + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . + // ^v 1 unit . + // boundary_low ------------------------- unsafe_interval + // ^v 1 unit v + // too_low - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // + // + // Note that the value of buffer could lie anywhere inside the range too_low + // to too_high. + // + // boundary_low, boundary_high and w are approximations of the real boundaries + // and v (the input number). They are guaranteed to be precise up to one unit. + // In fact the error is guaranteed to be strictly less than one unit. + // + // Anything that lies outside the unsafe interval is guaranteed not to round + // to v when read again. + // Anything that lies inside the safe interval is guaranteed to round to v + // when read again. + // If the number inside the buffer lies inside the unsafe interval but not + // inside the safe interval then we simply do not know and bail out (returning + // false). + // + // Similarly we have to take into account the imprecision of 'w' when finding + // the closest representation of 'w'. If we have two potential + // representations, and one is closer to both w_low and w_high, then we know + // it is closer to the actual value v. + // + // By generating the digits of too_high we got the largest (closest to + // too_high) buffer that is still in the unsafe interval. In the case where + // w_high < buffer < too_high we try to decrement the buffer. + // This way the buffer approaches (rounds towards) w. + // There are 3 conditions that stop the decrementation process: + // 1) the buffer is already below w_high + // 2) decrementing the buffer would make it leave the unsafe interval + // 3) decrementing the buffer would yield a number below w_high and farther + // away than the current number. In other words: + // (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high + // Instead of using the buffer directly we use its distance to too_high. + // Conceptually rest ~= too_high - buffer + // We need to do the following tests in this order to avoid over- and + // underflows. + ASSERT(rest <= unsafe_interval); + while (rest < small_distance && // Negated condition 1 + unsafe_interval - rest >= ten_kappa && // Negated condition 2 + (rest + ten_kappa < small_distance || // buffer{-1} > w_high + small_distance - rest >= rest + ten_kappa - small_distance)) { + buffer[length - 1]--; + rest += ten_kappa; + } + + // We have approached w+ as much as possible. We now test if approaching w- + // would require changing the buffer. If yes, then we have two possible + // representations close to w, but we cannot decide which one is closer. + if (rest < big_distance && + unsafe_interval - rest >= ten_kappa && + (rest + ten_kappa < big_distance || + big_distance - rest > rest + ten_kappa - big_distance)) { + return false; + } + + // Weeding test. + // The safe interval is [too_low + 2 ulp; too_high - 2 ulp] + // Since too_low = too_high - unsafe_interval this is equivalent to + // [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp] + // Conceptually we have: rest ~= too_high - buffer + return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit); +} + + +// Rounds the buffer upwards if the result is closer to v by possibly adding +// 1 to the buffer. If the precision of the calculation is not sufficient to +// round correctly, return false. +// The rounding might shift the whole buffer in which case the kappa is +// adjusted. For example "99", kappa = 3 might become "10", kappa = 4. +// +// If 2*rest > ten_kappa then the buffer needs to be round up. +// rest can have an error of +/- 1 unit. This function accounts for the +// imprecision and returns false, if the rounding direction cannot be +// unambiguously determined. +// +// Precondition: rest < ten_kappa. +static bool RoundWeedCounted(Vector<char> buffer, + int length, + uint64_t rest, + uint64_t ten_kappa, + uint64_t unit, + int* kappa) { + ASSERT(rest < ten_kappa); + // The following tests are done in a specific order to avoid overflows. They + // will work correctly with any uint64 values of rest < ten_kappa and unit. + // + // If the unit is too big, then we don't know which way to round. For example + // a unit of 50 means that the real number lies within rest +/- 50. If + // 10^kappa == 40 then there is no way to tell which way to round. + if (unit >= ten_kappa) return false; + // Even if unit is just half the size of 10^kappa we are already completely + // lost. (And after the previous test we know that the expression will not + // over/underflow.) + if (ten_kappa - unit <= unit) return false; + // If 2 * (rest + unit) <= 10^kappa we can safely round down. + if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) { + return true; + } + // If 2 * (rest - unit) >= 10^kappa, then we can safely round up. + if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) { + // Increment the last digit recursively until we find a non '9' digit. + buffer[length - 1]++; + for (int i = length - 1; i > 0; --i) { + if (buffer[i] != '0' + 10) break; + buffer[i] = '0'; + buffer[i - 1]++; + } + // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the + // exception of the first digit all digits are now '0'. Simply switch the + // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and + // the power (the kappa) is increased. + if (buffer[0] == '0' + 10) { + buffer[0] = '1'; + (*kappa) += 1; + } + return true; + } + return false; +} + +// Returns the biggest power of ten that is less than or equal to the given +// number. We furthermore receive the maximum number of bits 'number' has. +// +// Returns power == 10^(exponent_plus_one-1) such that +// power <= number < power * 10. +// If number_bits == 0 then 0^(0-1) is returned. +// The number of bits must be <= 32. +// Precondition: number < (1 << (number_bits + 1)). + +// Inspired by the method for finding an integer log base 10 from here: +// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 +static unsigned int const kSmallPowersOfTen[] = + {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, + 1000000000}; + +static void BiggestPowerTen(uint32_t number, + int number_bits, + uint32_t* power, + int* exponent_plus_one) { + ASSERT(number < (1u << (number_bits + 1))); + // 1233/4096 is approximately 1/lg(10). + int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12); + // We increment to skip over the first entry in the kPowersOf10 table. + // Note: kPowersOf10[i] == 10^(i-1). + exponent_plus_one_guess++; + // We don't have any guarantees that 2^number_bits <= number. + if (number < kSmallPowersOfTen[exponent_plus_one_guess]) { + exponent_plus_one_guess--; + } + *power = kSmallPowersOfTen[exponent_plus_one_guess]; + *exponent_plus_one = exponent_plus_one_guess; +} + +// Generates the digits of input number w. +// w is a floating-point number (DiyFp), consisting of a significand and an +// exponent. Its exponent is bounded by kMinimalTargetExponent and +// kMaximalTargetExponent. +// Hence -60 <= w.e() <= -32. +// +// Returns false if it fails, in which case the generated digits in the buffer +// should not be used. +// Preconditions: +// * low, w and high are correct up to 1 ulp (unit in the last place). That +// is, their error must be less than a unit of their last digits. +// * low.e() == w.e() == high.e() +// * low < w < high, and taking into account their error: low~ <= high~ +// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent +// Postconditions: returns false if procedure fails. +// otherwise: +// * buffer is not null-terminated, but len contains the number of digits. +// * buffer contains the shortest possible decimal digit-sequence +// such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the +// correct values of low and high (without their error). +// * if more than one decimal representation gives the minimal number of +// decimal digits then the one closest to W (where W is the correct value +// of w) is chosen. +// Remark: this procedure takes into account the imprecision of its input +// numbers. If the precision is not enough to guarantee all the postconditions +// then false is returned. This usually happens rarely (~0.5%). +// +// Say, for the sake of example, that +// w.e() == -48, and w.f() == 0x1234567890abcdef +// w's value can be computed by w.f() * 2^w.e() +// We can obtain w's integral digits by simply shifting w.f() by -w.e(). +// -> w's integral part is 0x1234 +// w's fractional part is therefore 0x567890abcdef. +// Printing w's integral part is easy (simply print 0x1234 in decimal). +// In order to print its fraction we repeatedly multiply the fraction by 10 and +// get each digit. Example the first digit after the point would be computed by +// (0x567890abcdef * 10) >> 48. -> 3 +// The whole thing becomes slightly more complicated because we want to stop +// once we have enough digits. That is, once the digits inside the buffer +// represent 'w' we can stop. Everything inside the interval low - high +// represents w. However we have to pay attention to low, high and w's +// imprecision. +static bool DigitGen(DiyFp low, + DiyFp w, + DiyFp high, + Vector<char> buffer, + int* length, + int* kappa) { + ASSERT(low.e() == w.e() && w.e() == high.e()); + ASSERT(low.f() + 1 <= high.f() - 1); + ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); + // low, w and high are imprecise, but by less than one ulp (unit in the last + // place). + // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that + // the new numbers are outside of the interval we want the final + // representation to lie in. + // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield + // numbers that are certain to lie in the interval. We will use this fact + // later on. + // We will now start by generating the digits within the uncertain + // interval. Later we will weed out representations that lie outside the safe + // interval and thus _might_ lie outside the correct interval. + uint64_t unit = 1; + DiyFp too_low = DiyFp(low.f() - unit, low.e()); + DiyFp too_high = DiyFp(high.f() + unit, high.e()); + // too_low and too_high are guaranteed to lie outside the interval we want the + // generated number in. + DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low); + // We now cut the input number into two parts: the integral digits and the + // fractionals. We will not write any decimal separator though, but adapt + // kappa instead. + // Reminder: we are currently computing the digits (stored inside the buffer) + // such that: too_low < buffer * 10^kappa < too_high + // We use too_high for the digit_generation and stop as soon as possible. + // If we stop early we effectively round down. + DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e()); + // Division by one is a shift. + uint32_t integrals = static_cast<uint32_t>(too_high.f() >> -one.e()); + // Modulo by one is an and. + uint64_t fractionals = too_high.f() & (one.f() - 1); + uint32_t divisor; + int divisor_exponent_plus_one; + BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), + &divisor, &divisor_exponent_plus_one); + *kappa = divisor_exponent_plus_one; + *length = 0; + // Loop invariant: buffer = too_high / 10^kappa (integer division) + // The invariant holds for the first iteration: kappa has been initialized + // with the divisor exponent + 1. And the divisor is the biggest power of ten + // that is smaller than integrals. + while (*kappa > 0) { + int digit = integrals / divisor; + ASSERT(digit <= 9); + buffer[*length] = static_cast<char>('0' + digit); + (*length)++; + integrals %= divisor; + (*kappa)--; + // Note that kappa now equals the exponent of the divisor and that the + // invariant thus holds again. + uint64_t rest = + (static_cast<uint64_t>(integrals) << -one.e()) + fractionals; + // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) + // Reminder: unsafe_interval.e() == one.e() + if (rest < unsafe_interval.f()) { + // Rounding down (by not emitting the remaining digits) yields a number + // that lies within the unsafe interval. + return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(), + unsafe_interval.f(), rest, + static_cast<uint64_t>(divisor) << -one.e(), unit); + } + divisor /= 10; + } + + // The integrals have been generated. We are at the point of the decimal + // separator. In the following loop we simply multiply the remaining digits by + // 10 and divide by one. We just need to pay attention to multiply associated + // data (like the interval or 'unit'), too. + // Note that the multiplication by 10 does not overflow, because w.e >= -60 + // and thus one.e >= -60. + ASSERT(one.e() >= -60); + ASSERT(fractionals < one.f()); + ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); + for (;;) { + fractionals *= 10; + unit *= 10; + unsafe_interval.set_f(unsafe_interval.f() * 10); + // Integer division by one. + int digit = static_cast<int>(fractionals >> -one.e()); + ASSERT(digit <= 9); + buffer[*length] = static_cast<char>('0' + digit); + (*length)++; + fractionals &= one.f() - 1; // Modulo by one. + (*kappa)--; + if (fractionals < unsafe_interval.f()) { + return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit, + unsafe_interval.f(), fractionals, one.f(), unit); + } + } +} + + + +// Generates (at most) requested_digits digits of input number w. +// w is a floating-point number (DiyFp), consisting of a significand and an +// exponent. Its exponent is bounded by kMinimalTargetExponent and +// kMaximalTargetExponent. +// Hence -60 <= w.e() <= -32. +// +// Returns false if it fails, in which case the generated digits in the buffer +// should not be used. +// Preconditions: +// * w is correct up to 1 ulp (unit in the last place). That +// is, its error must be strictly less than a unit of its last digit. +// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent +// +// Postconditions: returns false if procedure fails. +// otherwise: +// * buffer is not null-terminated, but length contains the number of +// digits. +// * the representation in buffer is the most precise representation of +// requested_digits digits. +// * buffer contains at most requested_digits digits of w. If there are less +// than requested_digits digits then some trailing '0's have been removed. +// * kappa is such that +// w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2. +// +// Remark: This procedure takes into account the imprecision of its input +// numbers. If the precision is not enough to guarantee all the postconditions +// then false is returned. This usually happens rarely, but the failure-rate +// increases with higher requested_digits. +static bool DigitGenCounted(DiyFp w, + int requested_digits, + Vector<char> buffer, + int* length, + int* kappa) { + ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); + ASSERT(kMinimalTargetExponent >= -60); + ASSERT(kMaximalTargetExponent <= -32); + // w is assumed to have an error less than 1 unit. Whenever w is scaled we + // also scale its error. + uint64_t w_error = 1; + // We cut the input number into two parts: the integral digits and the + // fractional digits. We don't emit any decimal separator, but adapt kappa + // instead. Example: instead of writing "1.2" we put "12" into the buffer and + // increase kappa by 1. + DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e()); + // Division by one is a shift. + uint32_t integrals = static_cast<uint32_t>(w.f() >> -one.e()); + // Modulo by one is an and. + uint64_t fractionals = w.f() & (one.f() - 1); + uint32_t divisor; + int divisor_exponent_plus_one; + BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), + &divisor, &divisor_exponent_plus_one); + *kappa = divisor_exponent_plus_one; + *length = 0; + + // Loop invariant: buffer = w / 10^kappa (integer division) + // The invariant holds for the first iteration: kappa has been initialized + // with the divisor exponent + 1. And the divisor is the biggest power of ten + // that is smaller than 'integrals'. + while (*kappa > 0) { + int digit = integrals / divisor; + ASSERT(digit <= 9); + buffer[*length] = static_cast<char>('0' + digit); + (*length)++; + requested_digits--; + integrals %= divisor; + (*kappa)--; + // Note that kappa now equals the exponent of the divisor and that the + // invariant thus holds again. + if (requested_digits == 0) break; + divisor /= 10; + } + + if (requested_digits == 0) { + uint64_t rest = + (static_cast<uint64_t>(integrals) << -one.e()) + fractionals; + return RoundWeedCounted(buffer, *length, rest, + static_cast<uint64_t>(divisor) << -one.e(), w_error, + kappa); + } + + // The integrals have been generated. We are at the point of the decimal + // separator. In the following loop we simply multiply the remaining digits by + // 10 and divide by one. We just need to pay attention to multiply associated + // data (the 'unit'), too. + // Note that the multiplication by 10 does not overflow, because w.e >= -60 + // and thus one.e >= -60. + ASSERT(one.e() >= -60); + ASSERT(fractionals < one.f()); + ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); + while (requested_digits > 0 && fractionals > w_error) { + fractionals *= 10; + w_error *= 10; + // Integer division by one. + int digit = static_cast<int>(fractionals >> -one.e()); + ASSERT(digit <= 9); + buffer[*length] = static_cast<char>('0' + digit); + (*length)++; + requested_digits--; + fractionals &= one.f() - 1; // Modulo by one. + (*kappa)--; + } + if (requested_digits != 0) return false; + return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error, + kappa); +} + + +// Provides a decimal representation of v. +// Returns true if it succeeds, otherwise the result cannot be trusted. +// There will be *length digits inside the buffer (not null-terminated). +// If the function returns true then +// v == (double) (buffer * 10^decimal_exponent). +// The digits in the buffer are the shortest representation possible: no +// 0.09999999999999999 instead of 0.1. The shorter representation will even be +// chosen even if the longer one would be closer to v. +// The last digit will be closest to the actual v. That is, even if several +// digits might correctly yield 'v' when read again, the closest will be +// computed. +static bool Grisu3(double v, + FastDtoaMode mode, + Vector<char> buffer, + int* length, + int* decimal_exponent) { + DiyFp w = Double(v).AsNormalizedDiyFp(); + // boundary_minus and boundary_plus are the boundaries between v and its + // closest floating-point neighbors. Any number strictly between + // boundary_minus and boundary_plus will round to v when convert to a double. + // Grisu3 will never output representations that lie exactly on a boundary. + DiyFp boundary_minus, boundary_plus; + if (mode == FAST_DTOA_SHORTEST) { + Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus); + } else { + ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE); + float single_v = static_cast<float>(v); + Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus); + } + ASSERT(boundary_plus.e() == w.e()); + DiyFp ten_mk; // Cached power of ten: 10^-k + int mk; // -k + int ten_mk_minimal_binary_exponent = + kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); + int ten_mk_maximal_binary_exponent = + kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); + PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + ten_mk_minimal_binary_exponent, + ten_mk_maximal_binary_exponent, + &ten_mk, &mk); + ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + + DiyFp::kSignificandSize) && + (kMaximalTargetExponent >= w.e() + ten_mk.e() + + DiyFp::kSignificandSize)); + // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a + // 64 bit significand and ten_mk is thus only precise up to 64 bits. + + // The DiyFp::Times procedure rounds its result, and ten_mk is approximated + // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now + // off by a small amount. + // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. + // In other words: let f = scaled_w.f() and e = scaled_w.e(), then + // (f-1) * 2^e < w*10^k < (f+1) * 2^e + DiyFp scaled_w = DiyFp::Times(w, ten_mk); + ASSERT(scaled_w.e() == + boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize); + // In theory it would be possible to avoid some recomputations by computing + // the difference between w and boundary_minus/plus (a power of 2) and to + // compute scaled_boundary_minus/plus by subtracting/adding from + // scaled_w. However the code becomes much less readable and the speed + // enhancements are not terriffic. + DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); + DiyFp scaled_boundary_plus = DiyFp::Times(boundary_plus, ten_mk); + + // DigitGen will generate the digits of scaled_w. Therefore we have + // v == (double) (scaled_w * 10^-mk). + // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an + // integer than it will be updated. For instance if scaled_w == 1.23 then + // the buffer will be filled with "123" und the decimal_exponent will be + // decreased by 2. + int kappa; + bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, + buffer, length, &kappa); + *decimal_exponent = -mk + kappa; + return result; +} + + +// The "counted" version of grisu3 (see above) only generates requested_digits +// number of digits. This version does not generate the shortest representation, +// and with enough requested digits 0.1 will at some point print as 0.9999999... +// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and +// therefore the rounding strategy for halfway cases is irrelevant. +static bool Grisu3Counted(double v, + int requested_digits, + Vector<char> buffer, + int* length, + int* decimal_exponent) { + DiyFp w = Double(v).AsNormalizedDiyFp(); + DiyFp ten_mk; // Cached power of ten: 10^-k + int mk; // -k + int ten_mk_minimal_binary_exponent = + kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); + int ten_mk_maximal_binary_exponent = + kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); + PowersOfTenCache::GetCachedPowerForBinaryExponentRange( + ten_mk_minimal_binary_exponent, + ten_mk_maximal_binary_exponent, + &ten_mk, &mk); + ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + + DiyFp::kSignificandSize) && + (kMaximalTargetExponent >= w.e() + ten_mk.e() + + DiyFp::kSignificandSize)); + // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a + // 64 bit significand and ten_mk is thus only precise up to 64 bits. + + // The DiyFp::Times procedure rounds its result, and ten_mk is approximated + // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now + // off by a small amount. + // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. + // In other words: let f = scaled_w.f() and e = scaled_w.e(), then + // (f-1) * 2^e < w*10^k < (f+1) * 2^e + DiyFp scaled_w = DiyFp::Times(w, ten_mk); + + // We now have (double) (scaled_w * 10^-mk). + // DigitGen will generate the first requested_digits digits of scaled_w and + // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It + // will not always be exactly the same since DigitGenCounted only produces a + // limited number of digits.) + int kappa; + bool result = DigitGenCounted(scaled_w, requested_digits, + buffer, length, &kappa); + *decimal_exponent = -mk + kappa; + return result; +} + + +bool FastDtoa(double v, + FastDtoaMode mode, + int requested_digits, + Vector<char> buffer, + int* length, + int* decimal_point) { + ASSERT(v > 0); + ASSERT(!Double(v).IsSpecial()); + + bool result = false; + int decimal_exponent = 0; + switch (mode) { + case FAST_DTOA_SHORTEST: + case FAST_DTOA_SHORTEST_SINGLE: + result = Grisu3(v, mode, buffer, length, &decimal_exponent); + break; + case FAST_DTOA_PRECISION: + result = Grisu3Counted(v, requested_digits, + buffer, length, &decimal_exponent); + break; + default: + UNREACHABLE(); + } + if (result) { + *decimal_point = *length + decimal_exponent; + buffer[*length] = '\0'; + } + return result; +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-fast-dtoa.h b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.h new file mode 100644 index 0000000000..58a6470052 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-fast-dtoa.h @@ -0,0 +1,106 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_ +#define DOUBLE_CONVERSION_FAST_DTOA_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +enum FastDtoaMode { + // Computes the shortest representation of the given input. The returned + // result will be the most accurate number of this length. Longer + // representations might be more accurate. + FAST_DTOA_SHORTEST, + // Same as FAST_DTOA_SHORTEST but for single-precision floats. + FAST_DTOA_SHORTEST_SINGLE, + // Computes a representation where the precision (number of digits) is + // given as input. The precision is independent of the decimal point. + FAST_DTOA_PRECISION +}; + +// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not +// include the terminating '\0' character. +static const int kFastDtoaMaximalLength = 17; +// Same for single-precision numbers. +static const int kFastDtoaMaximalSingleLength = 9; + +// Provides a decimal representation of v. +// The result should be interpreted as buffer * 10^(point - length). +// +// Precondition: +// * v must be a strictly positive finite double. +// +// Returns true if it succeeds, otherwise the result can not be trusted. +// There will be *length digits inside the buffer followed by a null terminator. +// If the function returns true and mode equals +// - FAST_DTOA_SHORTEST, then +// the parameter requested_digits is ignored. +// The result satisfies +// v == (double) (buffer * 10^(point - length)). +// The digits in the buffer are the shortest representation possible. E.g. +// if 0.099999999999 and 0.1 represent the same double then "1" is returned +// with point = 0. +// The last digit will be closest to the actual v. That is, even if several +// digits might correctly yield 'v' when read again, the buffer will contain +// the one closest to v. +// - FAST_DTOA_PRECISION, then +// the buffer contains requested_digits digits. +// the difference v - (buffer * 10^(point-length)) is closest to zero for +// all possible representations of requested_digits digits. +// If there are two values that are equally close, then FastDtoa returns +// false. +// For both modes the buffer must be large enough to hold the result. +bool FastDtoa(double d, + FastDtoaMode mode, + int requested_digits, + Vector<char> buffer, + int* length, + int* decimal_point); + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_FAST_DTOA_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-ieee.h b/deps/icu-small/source/i18n/double-conversion-ieee.h new file mode 100644 index 0000000000..952bcea27f --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-ieee.h @@ -0,0 +1,420 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_DOUBLE_H_ +#define DOUBLE_CONVERSION_DOUBLE_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-diy-fp.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +// We assume that doubles and uint64_t have the same endianness. +static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); } +static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); } +static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); } +static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); } + +// Helper functions for doubles. +class Double { + public: + static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000); + static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000); + static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF); + static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000); + static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit. + static const int kSignificandSize = 53; + + Double() : d64_(0) {} + explicit Double(double d) : d64_(double_to_uint64(d)) {} + explicit Double(uint64_t d64) : d64_(d64) {} + explicit Double(DiyFp diy_fp) + : d64_(DiyFpToUint64(diy_fp)) {} + + // The value encoded by this Double must be greater or equal to +0.0. + // It must not be special (infinity, or NaN). + DiyFp AsDiyFp() const { + ASSERT(Sign() > 0); + ASSERT(!IsSpecial()); + return DiyFp(Significand(), Exponent()); + } + + // The value encoded by this Double must be strictly greater than 0. + DiyFp AsNormalizedDiyFp() const { + ASSERT(value() > 0.0); + uint64_t f = Significand(); + int e = Exponent(); + + // The current double could be a denormal. + while ((f & kHiddenBit) == 0) { + f <<= 1; + e--; + } + // Do the final shifts in one go. + f <<= DiyFp::kSignificandSize - kSignificandSize; + e -= DiyFp::kSignificandSize - kSignificandSize; + return DiyFp(f, e); + } + + // Returns the double's bit as uint64. + uint64_t AsUint64() const { + return d64_; + } + + // Returns the next greater double. Returns +infinity on input +infinity. + double NextDouble() const { + if (d64_ == kInfinity) return Double(kInfinity).value(); + if (Sign() < 0 && Significand() == 0) { + // -0.0 + return 0.0; + } + if (Sign() < 0) { + return Double(d64_ - 1).value(); + } else { + return Double(d64_ + 1).value(); + } + } + + double PreviousDouble() const { + if (d64_ == (kInfinity | kSignMask)) return -Infinity(); + if (Sign() < 0) { + return Double(d64_ + 1).value(); + } else { + if (Significand() == 0) return -0.0; + return Double(d64_ - 1).value(); + } + } + + int Exponent() const { + if (IsDenormal()) return kDenormalExponent; + + uint64_t d64 = AsUint64(); + int biased_e = + static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize); + return biased_e - kExponentBias; + } + + uint64_t Significand() const { + uint64_t d64 = AsUint64(); + uint64_t significand = d64 & kSignificandMask; + if (!IsDenormal()) { + return significand + kHiddenBit; + } else { + return significand; + } + } + + // Returns true if the double is a denormal. + bool IsDenormal() const { + uint64_t d64 = AsUint64(); + return (d64 & kExponentMask) == 0; + } + + // We consider denormals not to be special. + // Hence only Infinity and NaN are special. + bool IsSpecial() const { + uint64_t d64 = AsUint64(); + return (d64 & kExponentMask) == kExponentMask; + } + + bool IsNan() const { + uint64_t d64 = AsUint64(); + return ((d64 & kExponentMask) == kExponentMask) && + ((d64 & kSignificandMask) != 0); + } + + bool IsInfinite() const { + uint64_t d64 = AsUint64(); + return ((d64 & kExponentMask) == kExponentMask) && + ((d64 & kSignificandMask) == 0); + } + + int Sign() const { + uint64_t d64 = AsUint64(); + return (d64 & kSignMask) == 0? 1: -1; + } + + // Precondition: the value encoded by this Double must be greater or equal + // than +0.0. + DiyFp UpperBoundary() const { + ASSERT(Sign() > 0); + return DiyFp(Significand() * 2 + 1, Exponent() - 1); + } + + // Computes the two boundaries of this. + // The bigger boundary (m_plus) is normalized. The lower boundary has the same + // exponent as m_plus. + // Precondition: the value encoded by this Double must be greater than 0. + void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { + ASSERT(value() > 0.0); + DiyFp v = this->AsDiyFp(); + DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); + DiyFp m_minus; + if (LowerBoundaryIsCloser()) { + m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); + } else { + m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); + } + m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); + m_minus.set_e(m_plus.e()); + *out_m_plus = m_plus; + *out_m_minus = m_minus; + } + + bool LowerBoundaryIsCloser() const { + // The boundary is closer if the significand is of the form f == 2^p-1 then + // the lower boundary is closer. + // Think of v = 1000e10 and v- = 9999e9. + // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but + // at a distance of 1e8. + // The only exception is for the smallest normal: the largest denormal is + // at the same distance as its successor. + // Note: denormals have the same exponent as the smallest normals. + bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0); + return physical_significand_is_zero && (Exponent() != kDenormalExponent); + } + + double value() const { return uint64_to_double(d64_); } + + // Returns the significand size for a given order of magnitude. + // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude. + // This function returns the number of significant binary digits v will have + // once it's encoded into a double. In almost all cases this is equal to + // kSignificandSize. The only exceptions are denormals. They start with + // leading zeroes and their effective significand-size is hence smaller. + static int SignificandSizeForOrderOfMagnitude(int order) { + if (order >= (kDenormalExponent + kSignificandSize)) { + return kSignificandSize; + } + if (order <= kDenormalExponent) return 0; + return order - kDenormalExponent; + } + + static double Infinity() { + return Double(kInfinity).value(); + } + + static double NaN() { + return Double(kNaN).value(); + } + + private: + static const int kExponentBias = 0x3FF + kPhysicalSignificandSize; + static const int kDenormalExponent = -kExponentBias + 1; + static const int kMaxExponent = 0x7FF - kExponentBias; + static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000); + static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000); + + const uint64_t d64_; + + static uint64_t DiyFpToUint64(DiyFp diy_fp) { + uint64_t significand = diy_fp.f(); + int exponent = diy_fp.e(); + while (significand > kHiddenBit + kSignificandMask) { + significand >>= 1; + exponent++; + } + if (exponent >= kMaxExponent) { + return kInfinity; + } + if (exponent < kDenormalExponent) { + return 0; + } + while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) { + significand <<= 1; + exponent--; + } + uint64_t biased_exponent; + if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) { + biased_exponent = 0; + } else { + biased_exponent = static_cast<uint64_t>(exponent + kExponentBias); + } + return (significand & kSignificandMask) | + (biased_exponent << kPhysicalSignificandSize); + } + + DISALLOW_COPY_AND_ASSIGN(Double); +}; + +class Single { + public: + static const uint32_t kSignMask = 0x80000000; + static const uint32_t kExponentMask = 0x7F800000; + static const uint32_t kSignificandMask = 0x007FFFFF; + static const uint32_t kHiddenBit = 0x00800000; + static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit. + static const int kSignificandSize = 24; + + Single() : d32_(0) {} + explicit Single(float f) : d32_(float_to_uint32(f)) {} + explicit Single(uint32_t d32) : d32_(d32) {} + + // The value encoded by this Single must be greater or equal to +0.0. + // It must not be special (infinity, or NaN). + DiyFp AsDiyFp() const { + ASSERT(Sign() > 0); + ASSERT(!IsSpecial()); + return DiyFp(Significand(), Exponent()); + } + + // Returns the single's bit as uint64. + uint32_t AsUint32() const { + return d32_; + } + + int Exponent() const { + if (IsDenormal()) return kDenormalExponent; + + uint32_t d32 = AsUint32(); + int biased_e = + static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize); + return biased_e - kExponentBias; + } + + uint32_t Significand() const { + uint32_t d32 = AsUint32(); + uint32_t significand = d32 & kSignificandMask; + if (!IsDenormal()) { + return significand + kHiddenBit; + } else { + return significand; + } + } + + // Returns true if the single is a denormal. + bool IsDenormal() const { + uint32_t d32 = AsUint32(); + return (d32 & kExponentMask) == 0; + } + + // We consider denormals not to be special. + // Hence only Infinity and NaN are special. + bool IsSpecial() const { + uint32_t d32 = AsUint32(); + return (d32 & kExponentMask) == kExponentMask; + } + + bool IsNan() const { + uint32_t d32 = AsUint32(); + return ((d32 & kExponentMask) == kExponentMask) && + ((d32 & kSignificandMask) != 0); + } + + bool IsInfinite() const { + uint32_t d32 = AsUint32(); + return ((d32 & kExponentMask) == kExponentMask) && + ((d32 & kSignificandMask) == 0); + } + + int Sign() const { + uint32_t d32 = AsUint32(); + return (d32 & kSignMask) == 0? 1: -1; + } + + // Computes the two boundaries of this. + // The bigger boundary (m_plus) is normalized. The lower boundary has the same + // exponent as m_plus. + // Precondition: the value encoded by this Single must be greater than 0. + void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const { + ASSERT(value() > 0.0); + DiyFp v = this->AsDiyFp(); + DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); + DiyFp m_minus; + if (LowerBoundaryIsCloser()) { + m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); + } else { + m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); + } + m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); + m_minus.set_e(m_plus.e()); + *out_m_plus = m_plus; + *out_m_minus = m_minus; + } + + // Precondition: the value encoded by this Single must be greater or equal + // than +0.0. + DiyFp UpperBoundary() const { + ASSERT(Sign() > 0); + return DiyFp(Significand() * 2 + 1, Exponent() - 1); + } + + bool LowerBoundaryIsCloser() const { + // The boundary is closer if the significand is of the form f == 2^p-1 then + // the lower boundary is closer. + // Think of v = 1000e10 and v- = 9999e9. + // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but + // at a distance of 1e8. + // The only exception is for the smallest normal: the largest denormal is + // at the same distance as its successor. + // Note: denormals have the same exponent as the smallest normals. + bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0); + return physical_significand_is_zero && (Exponent() != kDenormalExponent); + } + + float value() const { return uint32_to_float(d32_); } + + static float Infinity() { + return Single(kInfinity).value(); + } + + static float NaN() { + return Single(kNaN).value(); + } + + private: + static const int kExponentBias = 0x7F + kPhysicalSignificandSize; + static const int kDenormalExponent = -kExponentBias + 1; + static const int kMaxExponent = 0xFF - kExponentBias; + static const uint32_t kInfinity = 0x7F800000; + static const uint32_t kNaN = 0x7FC00000; + + const uint32_t d32_; + + DISALLOW_COPY_AND_ASSIGN(Single); +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_DOUBLE_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion-utils.h b/deps/icu-small/source/i18n/double-conversion-utils.h new file mode 100644 index 0000000000..02795b4bc5 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion-utils.h @@ -0,0 +1,358 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_UTILS_H_ +#define DOUBLE_CONVERSION_UTILS_H_ + +#include <stdlib.h> +#include <string.h> + +// ICU PATCH: Use U_ASSERT instead of <assert.h> +#include "uassert.h" +#define ASSERT U_ASSERT + +#ifndef UNIMPLEMENTED +#define UNIMPLEMENTED() (abort()) +#endif +#ifndef DOUBLE_CONVERSION_NO_RETURN +#ifdef _MSC_VER +#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn) +#else +#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn)) +#endif +#endif +#ifndef UNREACHABLE +#ifdef _MSC_VER +void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); +inline void abort_noreturn() { abort(); } +#define UNREACHABLE() (abort_noreturn()) +#else +#define UNREACHABLE() (abort()) +#endif +#endif + + +// Double operations detection based on target architecture. +// Linux uses a 80bit wide floating point stack on x86. This induces double +// rounding, which in turn leads to wrong results. +// An easy way to test if the floating-point operations are correct is to +// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then +// the result is equal to 89255e-22. +// The best way to test this, is to create a division-function and to compare +// the output of the division with the expected result. (Inlining must be +// disabled.) +// On Linux,x86 89255e-22 != Div_double(89255.0/1e22) +// ICU PATCH: Enable ARM builds for Windows with 'defined(_M_ARM)'. +#if defined(_M_X64) || defined(__x86_64__) || \ + defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || \ + defined(__hppa__) || defined(__ia64__) || \ + defined(__mips__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ + defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ + defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ + defined(__SH4__) || defined(__alpha__) || \ + defined(_MIPS_ARCH_MIPS32R2) || \ + defined(__AARCH64EL__) || defined(__aarch64__) || \ + defined(__riscv) +#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 +#elif defined(__mc68000__) +#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS +#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) +#if defined(_WIN32) +// Windows uses a 64bit wide floating point stack. +#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 +#else +#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS +#endif // _WIN32 +#else +#error Target architecture was not detected as supported by Double-Conversion. +#endif + +#if defined(__GNUC__) +#define DOUBLE_CONVERSION_UNUSED __attribute__((unused)) +#else +#define DOUBLE_CONVERSION_UNUSED +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef short int16_t; // NOLINT +typedef unsigned short uint16_t; // NOLINT +typedef int int32_t; +typedef unsigned int uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +// intptr_t and friends are defined in crtdefs.h through stdio.h. + +#else + +#include <stdint.h> + +#endif + +typedef uint16_t uc16; + +// The following macro works on both 32 and 64-bit platforms. +// Usage: instead of writing 0x1234567890123456 +// write UINT64_2PART_C(0x12345678,90123456); +#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u)) + + +// The expression ARRAY_SIZE(a) is a compile-time constant of type +// size_t which represents the number of elements of the given +// array. You should only use ARRAY_SIZE on statically allocated +// arrays. +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) \ + ((sizeof(a) / sizeof(*(a))) / \ + static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) +#endif + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class +#ifndef DISALLOW_COPY_AND_ASSIGN +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) +#endif + +// A macro to disallow all the implicit constructors, namely the +// default constructor, copy constructor and operator= functions. +// +// This should be used in the private: declarations for a class +// that wants to prevent anyone from instantiating it. This is +// especially useful for classes containing only static methods. +#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName(); \ + DISALLOW_COPY_AND_ASSIGN(TypeName) +#endif + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +static const int kCharSize = sizeof(char); + +// Returns the maximum of the two parameters. +template <typename T> +static T Max(T a, T b) { + return a < b ? b : a; +} + + +// Returns the minimum of the two parameters. +template <typename T> +static T Min(T a, T b) { + return a < b ? a : b; +} + + +inline int StrLength(const char* string) { + size_t length = strlen(string); + ASSERT(length == static_cast<size_t>(static_cast<int>(length))); + return static_cast<int>(length); +} + +// This is a simplified version of V8's Vector class. +template <typename T> +class Vector { + public: + Vector() : start_(NULL), length_(0) {} + Vector(T* data, int len) : start_(data), length_(len) { + ASSERT(len == 0 || (len > 0 && data != NULL)); + } + + // Returns a vector using the same backing storage as this one, + // spanning from and including 'from', to but not including 'to'. + Vector<T> SubVector(int from, int to) { + ASSERT(to <= length_); + ASSERT(from < to); + ASSERT(0 <= from); + return Vector<T>(start() + from, to - from); + } + + // Returns the length of the vector. + int length() const { return length_; } + + // Returns whether or not the vector is empty. + bool is_empty() const { return length_ == 0; } + + // Returns the pointer to the start of the data in the vector. + T* start() const { return start_; } + + // Access individual vector elements - checks bounds in debug mode. + T& operator[](int index) const { + ASSERT(0 <= index && index < length_); + return start_[index]; + } + + T& first() { return start_[0]; } + + T& last() { return start_[length_ - 1]; } + + private: + T* start_; + int length_; +}; + + +// Helper class for building result strings in a character buffer. The +// purpose of the class is to use safe operations that checks the +// buffer bounds on all operations in debug mode. +class StringBuilder { + public: + StringBuilder(char* buffer, int buffer_size) + : buffer_(buffer, buffer_size), position_(0) { } + + ~StringBuilder() { if (!is_finalized()) Finalize(); } + + int size() const { return buffer_.length(); } + + // Get the current position in the builder. + int position() const { + ASSERT(!is_finalized()); + return position_; + } + + // Reset the position. + void Reset() { position_ = 0; } + + // Add a single character to the builder. It is not allowed to add + // 0-characters; use the Finalize() method to terminate the string + // instead. + void AddCharacter(char c) { + ASSERT(c != '\0'); + ASSERT(!is_finalized() && position_ < buffer_.length()); + buffer_[position_++] = c; + } + + // Add an entire string to the builder. Uses strlen() internally to + // compute the length of the input string. + void AddString(const char* s) { + AddSubstring(s, StrLength(s)); + } + + // Add the first 'n' characters of the given string 's' to the + // builder. The input string must have enough characters. + void AddSubstring(const char* s, int n) { + ASSERT(!is_finalized() && position_ + n < buffer_.length()); + ASSERT(static_cast<size_t>(n) <= strlen(s)); + memmove(&buffer_[position_], s, n * kCharSize); + position_ += n; + } + + + // Add character padding to the builder. If count is non-positive, + // nothing is added to the builder. + void AddPadding(char c, int count) { + for (int i = 0; i < count; i++) { + AddCharacter(c); + } + } + + // Finalize the string by 0-terminating it and returning the buffer. + char* Finalize() { + ASSERT(!is_finalized() && position_ < buffer_.length()); + buffer_[position_] = '\0'; + // Make sure nobody managed to add a 0-character to the + // buffer while building the string. + ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_)); + position_ = -1; + ASSERT(is_finalized()); + return buffer_.start(); + } + + private: + Vector<char> buffer_; + int position_; + + bool is_finalized() const { return position_ < 0; } + + DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); +}; + +// The type-based aliasing rule allows the compiler to assume that pointers of +// different types (for some definition of different) never alias each other. +// Thus the following code does not work: +// +// float f = foo(); +// int fbits = *(int*)(&f); +// +// The compiler 'knows' that the int pointer can't refer to f since the types +// don't match, so the compiler may cache f in a register, leaving random data +// in fbits. Using C++ style casts makes no difference, however a pointer to +// char data is assumed to alias any other pointer. This is the 'memcpy +// exception'. +// +// Bit_cast uses the memcpy exception to move the bits from a variable of one +// type of a variable of another type. Of course the end result is likely to +// be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005) +// will completely optimize BitCast away. +// +// There is an additional use for BitCast. +// Recent gccs will warn when they see casts that may result in breakage due to +// the type-based aliasing rule. If you have checked that there is no breakage +// you can use BitCast to cast one pointer type to another. This confuses gcc +// enough that it can no longer see that you have cast one pointer type to +// another thus avoiding the warning. +template <class Dest, class Source> +inline Dest BitCast(const Source& source) { + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. + DOUBLE_CONVERSION_UNUSED + typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; + + Dest dest; + memmove(&dest, &source, sizeof(dest)); + return dest; +} + +template <class Dest, class Source> +inline Dest BitCast(Source* source) { + return BitCast<Dest>(reinterpret_cast<uintptr_t>(source)); +} + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_UTILS_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion.cpp b/deps/icu-small/source/i18n/double-conversion.cpp new file mode 100644 index 0000000000..8629284aa0 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion.cpp @@ -0,0 +1,1005 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#include <limits.h> +#include <math.h> + +// ICU PATCH: Customize header file paths for ICU. +// The files fixed-dtoa.h and strtod.h are not needed. + +#include "double-conversion.h" + +#include "double-conversion-bignum-dtoa.h" +#include "double-conversion-fast-dtoa.h" +#include "double-conversion-ieee.h" +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +#if 0 // not needed for ICU +const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() { + int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN; + static DoubleToStringConverter converter(flags, + "Infinity", + "NaN", + 'e', + -6, 21, + 6, 0); + return converter; +} + + +bool DoubleToStringConverter::HandleSpecialValues( + double value, + StringBuilder* result_builder) const { + Double double_inspect(value); + if (double_inspect.IsInfinite()) { + if (infinity_symbol_ == NULL) return false; + if (value < 0) { + result_builder->AddCharacter('-'); + } + result_builder->AddString(infinity_symbol_); + return true; + } + if (double_inspect.IsNan()) { + if (nan_symbol_ == NULL) return false; + result_builder->AddString(nan_symbol_); + return true; + } + return false; +} + + +void DoubleToStringConverter::CreateExponentialRepresentation( + const char* decimal_digits, + int length, + int exponent, + StringBuilder* result_builder) const { + ASSERT(length != 0); + result_builder->AddCharacter(decimal_digits[0]); + if (length != 1) { + result_builder->AddCharacter('.'); + result_builder->AddSubstring(&decimal_digits[1], length-1); + } + result_builder->AddCharacter(exponent_character_); + if (exponent < 0) { + result_builder->AddCharacter('-'); + exponent = -exponent; + } else { + if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) { + result_builder->AddCharacter('+'); + } + } + if (exponent == 0) { + result_builder->AddCharacter('0'); + return; + } + ASSERT(exponent < 1e4); + const int kMaxExponentLength = 5; + char buffer[kMaxExponentLength + 1]; + buffer[kMaxExponentLength] = '\0'; + int first_char_pos = kMaxExponentLength; + while (exponent > 0) { + buffer[--first_char_pos] = '0' + (exponent % 10); + exponent /= 10; + } + result_builder->AddSubstring(&buffer[first_char_pos], + kMaxExponentLength - first_char_pos); +} + + +void DoubleToStringConverter::CreateDecimalRepresentation( + const char* decimal_digits, + int length, + int decimal_point, + int digits_after_point, + StringBuilder* result_builder) const { + // Create a representation that is padded with zeros if needed. + if (decimal_point <= 0) { + // "0.00000decimal_rep" or "0.000decimal_rep00". + result_builder->AddCharacter('0'); + if (digits_after_point > 0) { + result_builder->AddCharacter('.'); + result_builder->AddPadding('0', -decimal_point); + ASSERT(length <= digits_after_point - (-decimal_point)); + result_builder->AddSubstring(decimal_digits, length); + int remaining_digits = digits_after_point - (-decimal_point) - length; + result_builder->AddPadding('0', remaining_digits); + } + } else if (decimal_point >= length) { + // "decimal_rep0000.00000" or "decimal_rep.0000". + result_builder->AddSubstring(decimal_digits, length); + result_builder->AddPadding('0', decimal_point - length); + if (digits_after_point > 0) { + result_builder->AddCharacter('.'); + result_builder->AddPadding('0', digits_after_point); + } + } else { + // "decima.l_rep000". + ASSERT(digits_after_point > 0); + result_builder->AddSubstring(decimal_digits, decimal_point); + result_builder->AddCharacter('.'); + ASSERT(length - decimal_point <= digits_after_point); + result_builder->AddSubstring(&decimal_digits[decimal_point], + length - decimal_point); + int remaining_digits = digits_after_point - (length - decimal_point); + result_builder->AddPadding('0', remaining_digits); + } + if (digits_after_point == 0) { + if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) { + result_builder->AddCharacter('.'); + } + if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) { + result_builder->AddCharacter('0'); + } + } +} + + +bool DoubleToStringConverter::ToShortestIeeeNumber( + double value, + StringBuilder* result_builder, + DoubleToStringConverter::DtoaMode mode) const { + ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE); + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + int decimal_point; + bool sign; + const int kDecimalRepCapacity = kBase10MaximalLength + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + + bool unique_zero = (flags_ & UNIQUE_ZERO) != 0; + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + int exponent = decimal_point - 1; + if ((decimal_in_shortest_low_ <= exponent) && + (exponent < decimal_in_shortest_high_)) { + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, + decimal_point, + Max(0, decimal_rep_length - decimal_point), + result_builder); + } else { + CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent, + result_builder); + } + return true; +} + + +bool DoubleToStringConverter::ToFixed(double value, + int requested_digits, + StringBuilder* result_builder) const { + ASSERT(kMaxFixedDigitsBeforePoint == 60); + const double kFirstNonFixed = 1e60; + + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (requested_digits > kMaxFixedDigitsAfterPoint) return false; + if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false; + + // Find a sufficiently precise decimal representation of n. + int decimal_point; + bool sign; + // Add space for the '\0' byte. + const int kDecimalRepCapacity = + kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + DoubleToAscii(value, FIXED, requested_digits, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, + requested_digits, result_builder); + return true; +} + + +bool DoubleToStringConverter::ToExponential( + double value, + int requested_digits, + StringBuilder* result_builder) const { + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (requested_digits < -1) return false; + if (requested_digits > kMaxExponentialDigits) return false; + + int decimal_point; + bool sign; + // Add space for digit before the decimal point and the '\0' character. + const int kDecimalRepCapacity = kMaxExponentialDigits + 2; + ASSERT(kDecimalRepCapacity > kBase10MaximalLength); + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + if (requested_digits == -1) { + DoubleToAscii(value, SHORTEST, 0, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + } else { + DoubleToAscii(value, PRECISION, requested_digits + 1, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + ASSERT(decimal_rep_length <= requested_digits + 1); + + for (int i = decimal_rep_length; i < requested_digits + 1; ++i) { + decimal_rep[i] = '0'; + } + decimal_rep_length = requested_digits + 1; + } + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + int exponent = decimal_point - 1; + CreateExponentialRepresentation(decimal_rep, + decimal_rep_length, + exponent, + result_builder); + return true; +} + + +bool DoubleToStringConverter::ToPrecision(double value, + int precision, + StringBuilder* result_builder) const { + if (Double(value).IsSpecial()) { + return HandleSpecialValues(value, result_builder); + } + + if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) { + return false; + } + + // Find a sufficiently precise decimal representation of n. + int decimal_point; + bool sign; + // Add one for the terminating null character. + const int kDecimalRepCapacity = kMaxPrecisionDigits + 1; + char decimal_rep[kDecimalRepCapacity]; + int decimal_rep_length; + + DoubleToAscii(value, PRECISION, precision, + decimal_rep, kDecimalRepCapacity, + &sign, &decimal_rep_length, &decimal_point); + ASSERT(decimal_rep_length <= precision); + + bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); + if (sign && (value != 0.0 || !unique_zero)) { + result_builder->AddCharacter('-'); + } + + // The exponent if we print the number as x.xxeyyy. That is with the + // decimal point after the first digit. + int exponent = decimal_point - 1; + + int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; + if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || + (decimal_point - precision + extra_zero > + max_trailing_padding_zeroes_in_precision_mode_)) { + // Fill buffer to contain 'precision' digits. + // Usually the buffer is already at the correct length, but 'DoubleToAscii' + // is allowed to return less characters. + for (int i = decimal_rep_length; i < precision; ++i) { + decimal_rep[i] = '0'; + } + + CreateExponentialRepresentation(decimal_rep, + precision, + exponent, + result_builder); + } else { + CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, + Max(0, precision - decimal_point), + result_builder); + } + return true; +} +#endif // not needed for ICU + + +static BignumDtoaMode DtoaToBignumDtoaMode( + DoubleToStringConverter::DtoaMode dtoa_mode) { + switch (dtoa_mode) { + case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST; + case DoubleToStringConverter::SHORTEST_SINGLE: + return BIGNUM_DTOA_SHORTEST_SINGLE; + case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED; + case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; + default: + UNREACHABLE(); + } +} + + +void DoubleToStringConverter::DoubleToAscii(double v, + DtoaMode mode, + int requested_digits, + char* buffer, + int buffer_length, + bool* sign, + int* length, + int* point) { + Vector<char> vector(buffer, buffer_length); + ASSERT(!Double(v).IsSpecial()); + ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0); + + if (Double(v).Sign() < 0) { + *sign = true; + v = -v; + } else { + *sign = false; + } + + if (mode == PRECISION && requested_digits == 0) { + vector[0] = '\0'; + *length = 0; + return; + } + + if (v == 0) { + vector[0] = '0'; + vector[1] = '\0'; + *length = 1; + *point = 1; + return; + } + + bool fast_worked; + switch (mode) { + case SHORTEST: + fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point); + break; +#if 0 // not needed for ICU + case SHORTEST_SINGLE: + fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0, + vector, length, point); + break; + case FIXED: + fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point); + break; + case PRECISION: + fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits, + vector, length, point); + break; +#endif // not needed for ICU + default: + fast_worked = false; + UNREACHABLE(); + } + if (fast_worked) return; + + // If the fast dtoa didn't succeed use the slower bignum version. + BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode); + BignumDtoa(v, bignum_mode, requested_digits, vector, length, point); + vector[*length] = '\0'; +} + + +#if 0 // not needed for ICU +// Consumes the given substring from the iterator. +// Returns false, if the substring does not match. +template <class Iterator> +static bool ConsumeSubString(Iterator* current, + Iterator end, + const char* substring) { + ASSERT(**current == *substring); + for (substring++; *substring != '\0'; substring++) { + ++*current; + if (*current == end || **current != *substring) return false; + } + ++*current; + return true; +} + + +// Maximum number of significant digits in decimal representation. +// The longest possible double in decimal representation is +// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 +// (768 digits). If we parse a number whose first digits are equal to a +// mean of 2 adjacent doubles (that could have up to 769 digits) the result +// must be rounded to the bigger one unless the tail consists of zeros, so +// we don't need to preserve all the digits. +const int kMaxSignificantDigits = 772; + + +static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 }; +static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7); + + +static const uc16 kWhitespaceTable16[] = { + 160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195, + 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279 +}; +static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16); + + +static bool isWhitespace(int x) { + if (x < 128) { + for (int i = 0; i < kWhitespaceTable7Length; i++) { + if (kWhitespaceTable7[i] == x) return true; + } + } else { + for (int i = 0; i < kWhitespaceTable16Length; i++) { + if (kWhitespaceTable16[i] == x) return true; + } + } + return false; +} + + +// Returns true if a nonspace found and false if the end has reached. +template <class Iterator> +static inline bool AdvanceToNonspace(Iterator* current, Iterator end) { + while (*current != end) { + if (!isWhitespace(**current)) return true; + ++*current; + } + return false; +} + + +static bool isDigit(int x, int radix) { + return (x >= '0' && x <= '9' && x < '0' + radix) + || (radix > 10 && x >= 'a' && x < 'a' + radix - 10) + || (radix > 10 && x >= 'A' && x < 'A' + radix - 10); +} + + +static double SignedZero(bool sign) { + return sign ? -0.0 : 0.0; +} + + +// Returns true if 'c' is a decimal digit that is valid for the given radix. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the last +// condition was always true. By moving it into a separate function the +// compiler wouldn't warn anymore. +#if _MSC_VER +#pragma optimize("",off) +static bool IsDecimalDigitForRadix(int c, int radix) { + return '0' <= c && c <= '9' && (c - '0') < radix; +} +#pragma optimize("",on) +#else +static bool inline IsDecimalDigitForRadix(int c, int radix) { + return '0' <= c && c <= '9' && (c - '0') < radix; +} +#endif +// Returns true if 'c' is a character digit that is valid for the given radix. +// The 'a_character' should be 'a' or 'A'. +// +// The function is small and could be inlined, but VS2012 emitted a warning +// because it constant-propagated the radix and concluded that the first +// condition was always false. By moving it into a separate function the +// compiler wouldn't warn anymore. +static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { + return radix > 10 && c >= a_character && c < a_character + radix - 10; +} + + +// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. +template <int radix_log_2, class Iterator> +static double RadixStringToIeee(Iterator* current, + Iterator end, + bool sign, + bool allow_trailing_junk, + double junk_string_value, + bool read_as_double, + bool* result_is_junk) { + ASSERT(*current != end); + + const int kDoubleSize = Double::kSignificandSize; + const int kSingleSize = Single::kSignificandSize; + const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; + + *result_is_junk = true; + + // Skip leading 0s. + while (**current == '0') { + ++(*current); + if (*current == end) { + *result_is_junk = false; + return SignedZero(sign); + } + } + + int64_t number = 0; + int exponent = 0; + const int radix = (1 << radix_log_2); + + do { + int digit; + if (IsDecimalDigitForRadix(**current, radix)) { + digit = static_cast<char>(**current) - '0'; + } else if (IsCharacterDigitForRadix(**current, radix, 'a')) { + digit = static_cast<char>(**current) - 'a' + 10; + } else if (IsCharacterDigitForRadix(**current, radix, 'A')) { + digit = static_cast<char>(**current) - 'A' + 10; + } else { + if (allow_trailing_junk || !AdvanceToNonspace(current, end)) { + break; + } else { + return junk_string_value; + } + } + + number = number * radix + digit; + int overflow = static_cast<int>(number >> kSignificandSize); + if (overflow != 0) { + // Overflow occurred. Need to determine which direction to round the + // result. + int overflow_bits_count = 1; + while (overflow > 1) { + overflow_bits_count++; + overflow >>= 1; + } + + int dropped_bits_mask = ((1 << overflow_bits_count) - 1); + int dropped_bits = static_cast<int>(number) & dropped_bits_mask; + number >>= overflow_bits_count; + exponent = overflow_bits_count; + + bool zero_tail = true; + for (;;) { + ++(*current); + if (*current == end || !isDigit(**current, radix)) break; + zero_tail = zero_tail && **current == '0'; + exponent += radix_log_2; + } + + if (!allow_trailing_junk && AdvanceToNonspace(current, end)) { + return junk_string_value; + } + + int middle_value = (1 << (overflow_bits_count - 1)); + if (dropped_bits > middle_value) { + number++; // Rounding up. + } else if (dropped_bits == middle_value) { + // Rounding to even to consistency with decimals: half-way case rounds + // up if significant part is odd and down otherwise. + if ((number & 1) != 0 || !zero_tail) { + number++; // Rounding up. + } + } + + // Rounding up may cause overflow. + if ((number & ((int64_t)1 << kSignificandSize)) != 0) { + exponent++; + number >>= 1; + } + break; + } + ++(*current); + } while (*current != end); + + ASSERT(number < ((int64_t)1 << kSignificandSize)); + ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number); + + *result_is_junk = false; + + if (exponent == 0) { + if (sign) { + if (number == 0) return -0.0; + number = -number; + } + return static_cast<double>(number); + } + + ASSERT(number != 0); + return Double(DiyFp(number, exponent)).value(); +} + + +template <class Iterator> +double StringToDoubleConverter::StringToIeee( + Iterator input, + int length, + bool read_as_double, + int* processed_characters_count) const { + Iterator current = input; + Iterator end = input + length; + + *processed_characters_count = 0; + + const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0; + const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0; + const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0; + const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0; + + // To make sure that iterator dereferencing is valid the following + // convention is used: + // 1. Each '++current' statement is followed by check for equality to 'end'. + // 2. If AdvanceToNonspace returned false then current == end. + // 3. If 'current' becomes equal to 'end' the function returns or goes to + // 'parsing_done'. + // 4. 'current' is not dereferenced after the 'parsing_done' label. + // 5. Code before 'parsing_done' may rely on 'current != end'. + if (current == end) return empty_string_value_; + + if (allow_leading_spaces || allow_trailing_spaces) { + if (!AdvanceToNonspace(¤t, end)) { + *processed_characters_count = static_cast<int>(current - input); + return empty_string_value_; + } + if (!allow_leading_spaces && (input != current)) { + // No leading spaces allowed, but AdvanceToNonspace moved forward. + return junk_string_value_; + } + } + + // The longest form of simplified number is: "-<significant digits>.1eXXX\0". + const int kBufferSize = kMaxSignificantDigits + 10; + char buffer[kBufferSize]; // NOLINT: size is known at compile time. + int buffer_pos = 0; + + // Exponent will be adjusted if insignificant digits of the integer part + // or insignificant leading zeros of the fractional part are dropped. + int exponent = 0; + int significant_digits = 0; + int insignificant_digits = 0; + bool nonzero_digit_dropped = false; + + bool sign = false; + + if (*current == '+' || *current == '-') { + sign = (*current == '-'); + ++current; + Iterator next_non_space = current; + // Skip following spaces (if allowed). + if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; + if (!allow_spaces_after_sign && (current != next_non_space)) { + return junk_string_value_; + } + current = next_non_space; + } + + if (infinity_symbol_ != NULL) { + if (*current == infinity_symbol_[0]) { + if (!ConsumeSubString(¤t, end, infinity_symbol_)) { + return junk_string_value_; + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + + ASSERT(buffer_pos == 0); + *processed_characters_count = static_cast<int>(current - input); + return sign ? -Double::Infinity() : Double::Infinity(); + } + } + + if (nan_symbol_ != NULL) { + if (*current == nan_symbol_[0]) { + if (!ConsumeSubString(¤t, end, nan_symbol_)) { + return junk_string_value_; + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + + ASSERT(buffer_pos == 0); + *processed_characters_count = static_cast<int>(current - input); + return sign ? -Double::NaN() : Double::NaN(); + } + } + + bool leading_zero = false; + if (*current == '0') { + ++current; + if (current == end) { + *processed_characters_count = static_cast<int>(current - input); + return SignedZero(sign); + } + + leading_zero = true; + + // It could be hexadecimal value. + if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) { + ++current; + if (current == end || !isDigit(*current, 16)) { + return junk_string_value_; // "0x". + } + + bool result_is_junk; + double result = RadixStringToIeee<4>(¤t, + end, + sign, + allow_trailing_junk, + junk_string_value_, + read_as_double, + &result_is_junk); + if (!result_is_junk) { + if (allow_trailing_spaces) AdvanceToNonspace(¤t, end); + *processed_characters_count = static_cast<int>(current - input); + } + return result; + } + + // Ignore leading zeros in the integer part. + while (*current == '0') { + ++current; + if (current == end) { + *processed_characters_count = static_cast<int>(current - input); + return SignedZero(sign); + } + } + } + + bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0; + + // Copy significant digits of the integer part (if any) to the buffer. + while (*current >= '0' && *current <= '9') { + if (significant_digits < kMaxSignificantDigits) { + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = static_cast<char>(*current); + significant_digits++; + // Will later check if it's an octal in the buffer. + } else { + insignificant_digits++; // Move the digit into the exponential part. + nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; + } + octal = octal && *current < '8'; + ++current; + if (current == end) goto parsing_done; + } + + if (significant_digits == 0) { + octal = false; + } + + if (*current == '.') { + if (octal && !allow_trailing_junk) return junk_string_value_; + if (octal) goto parsing_done; + + ++current; + if (current == end) { + if (significant_digits == 0 && !leading_zero) { + return junk_string_value_; + } else { + goto parsing_done; + } + } + + if (significant_digits == 0) { + // octal = false; + // Integer part consists of 0 or is absent. Significant digits start after + // leading zeros (if any). + while (*current == '0') { + ++current; + if (current == end) { + *processed_characters_count = static_cast<int>(current - input); + return SignedZero(sign); + } + exponent--; // Move this 0 into the exponent. + } + } + + // There is a fractional part. + // We don't emit a '.', but adjust the exponent instead. + while (*current >= '0' && *current <= '9') { + if (significant_digits < kMaxSignificantDigits) { + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos++] = static_cast<char>(*current); + significant_digits++; + exponent--; + } else { + // Ignore insignificant digits in the fractional part. + nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; + } + ++current; + if (current == end) goto parsing_done; + } + } + + if (!leading_zero && exponent == 0 && significant_digits == 0) { + // If leading_zeros is true then the string contains zeros. + // If exponent < 0 then string was [+-]\.0*... + // If significant_digits != 0 the string is not equal to 0. + // Otherwise there are no digits in the string. + return junk_string_value_; + } + + // Parse exponential part. + if (*current == 'e' || *current == 'E') { + if (octal && !allow_trailing_junk) return junk_string_value_; + if (octal) goto parsing_done; + ++current; + if (current == end) { + if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + char exponen_sign = '+'; + if (*current == '+' || *current == '-') { + exponen_sign = static_cast<char>(*current); + ++current; + if (current == end) { + if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + } + + if (current == end || *current < '0' || *current > '9') { + if (allow_trailing_junk) { + goto parsing_done; + } else { + return junk_string_value_; + } + } + + const int max_exponent = INT_MAX / 2; + ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); + int num = 0; + do { + // Check overflow. + int digit = *current - '0'; + if (num >= max_exponent / 10 + && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { + num = max_exponent; + } else { + num = num * 10 + digit; + } + ++current; + } while (current != end && *current >= '0' && *current <= '9'); + + exponent += (exponen_sign == '-' ? -num : num); + } + + if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { + return junk_string_value_; + } + if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { + return junk_string_value_; + } + if (allow_trailing_spaces) { + AdvanceToNonspace(¤t, end); + } + + parsing_done: + exponent += insignificant_digits; + + if (octal) { + double result; + bool result_is_junk; + char* start = buffer; + result = RadixStringToIeee<3>(&start, + buffer + buffer_pos, + sign, + allow_trailing_junk, + junk_string_value_, + read_as_double, + &result_is_junk); + ASSERT(!result_is_junk); + *processed_characters_count = static_cast<int>(current - input); + return result; + } + + if (nonzero_digit_dropped) { + buffer[buffer_pos++] = '1'; + exponent--; + } + + ASSERT(buffer_pos < kBufferSize); + buffer[buffer_pos] = '\0'; + + double converted; + if (read_as_double) { + converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent); + } else { + converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent); + } + *processed_characters_count = static_cast<int>(current - input); + return sign? -converted: converted; +} + + +double StringToDoubleConverter::StringToDouble( + const char* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +double StringToDoubleConverter::StringToDouble( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return StringToIeee(buffer, length, true, processed_characters_count); +} + + +float StringToDoubleConverter::StringToFloat( + const char* buffer, + int length, + int* processed_characters_count) const { + return static_cast<float>(StringToIeee(buffer, length, false, + processed_characters_count)); +} + + +float StringToDoubleConverter::StringToFloat( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return static_cast<float>(StringToIeee(buffer, length, false, + processed_characters_count)); +} +#endif // not needed for ICU + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/double-conversion.h b/deps/icu-small/source/i18n/double-conversion.h new file mode 100644 index 0000000000..0939412734 --- /dev/null +++ b/deps/icu-small/source/i18n/double-conversion.h @@ -0,0 +1,566 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// From the double-conversion library. Original license: +// +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING +#include "unicode/utypes.h" +#if !UCONFIG_NO_FORMATTING + +#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ +#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ + +// ICU PATCH: Customize header file paths for ICU. + +#include "double-conversion-utils.h" + +// ICU PATCH: Wrap in ICU namespace +U_NAMESPACE_BEGIN + +namespace double_conversion { + +class DoubleToStringConverter { + public: +#if 0 // not needed for ICU + // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint + // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the + // function returns false. + static const int kMaxFixedDigitsBeforePoint = 60; + static const int kMaxFixedDigitsAfterPoint = 60; + + // When calling ToExponential with a requested_digits + // parameter > kMaxExponentialDigits then the function returns false. + static const int kMaxExponentialDigits = 120; + + // When calling ToPrecision with a requested_digits + // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits + // then the function returns false. + static const int kMinPrecisionDigits = 1; + static const int kMaxPrecisionDigits = 120; + + enum Flags { + NO_FLAGS = 0, + EMIT_POSITIVE_EXPONENT_SIGN = 1, + EMIT_TRAILING_DECIMAL_POINT = 2, + EMIT_TRAILING_ZERO_AFTER_POINT = 4, + UNIQUE_ZERO = 8 + }; + + // Flags should be a bit-or combination of the possible Flags-enum. + // - NO_FLAGS: no special flags. + // - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent + // form, emits a '+' for positive exponents. Example: 1.2e+2. + // - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is + // converted into decimal format then a trailing decimal point is appended. + // Example: 2345.0 is converted to "2345.". + // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point + // emits a trailing '0'-character. This flag requires the + // EXMIT_TRAILING_DECIMAL_POINT flag. + // Example: 2345.0 is converted to "2345.0". + // - UNIQUE_ZERO: "-0.0" is converted to "0.0". + // + // Infinity symbol and nan_symbol provide the string representation for these + // special values. If the string is NULL and the special value is encountered + // then the conversion functions return false. + // + // The exponent_character is used in exponential representations. It is + // usually 'e' or 'E'. + // + // When converting to the shortest representation the converter will + // represent input numbers in decimal format if they are in the interval + // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[ + // (lower boundary included, greater boundary excluded). + // Example: with decimal_in_shortest_low = -6 and + // decimal_in_shortest_high = 21: + // ToShortest(0.000001) -> "0.000001" + // ToShortest(0.0000001) -> "1e-7" + // ToShortest(111111111111111111111.0) -> "111111111111111110000" + // ToShortest(100000000000000000000.0) -> "100000000000000000000" + // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" + // + // When converting to precision mode the converter may add + // max_leading_padding_zeroes before returning the number in exponential + // format. + // Example with max_leading_padding_zeroes_in_precision_mode = 6. + // ToPrecision(0.0000012345, 2) -> "0.0000012" + // ToPrecision(0.00000012345, 2) -> "1.2e-7" + // Similarily the converter may add up to + // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid + // returning an exponential representation. A zero added by the + // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: + // ToPrecision(230.0, 2) -> "230" + // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. + // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. + DoubleToStringConverter(int flags, + const char* infinity_symbol, + const char* nan_symbol, + char exponent_character, + int decimal_in_shortest_low, + int decimal_in_shortest_high, + int max_leading_padding_zeroes_in_precision_mode, + int max_trailing_padding_zeroes_in_precision_mode) + : flags_(flags), + infinity_symbol_(infinity_symbol), + nan_symbol_(nan_symbol), + exponent_character_(exponent_character), + decimal_in_shortest_low_(decimal_in_shortest_low), + decimal_in_shortest_high_(decimal_in_shortest_high), + max_leading_padding_zeroes_in_precision_mode_( + max_leading_padding_zeroes_in_precision_mode), + max_trailing_padding_zeroes_in_precision_mode_( + max_trailing_padding_zeroes_in_precision_mode) { + // When 'trailing zero after the point' is set, then 'trailing point' + // must be set too. + ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) || + !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0)); + } + + // Returns a converter following the EcmaScript specification. + static const DoubleToStringConverter& EcmaScriptConverter(); + + // Computes the shortest string of digits that correctly represent the input + // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high + // (see constructor) it then either returns a decimal representation, or an + // exponential representation. + // Example with decimal_in_shortest_low = -6, + // decimal_in_shortest_high = 21, + // EMIT_POSITIVE_EXPONENT_SIGN activated, and + // EMIT_TRAILING_DECIMAL_POINT deactived: + // ToShortest(0.000001) -> "0.000001" + // ToShortest(0.0000001) -> "1e-7" + // ToShortest(111111111111111111111.0) -> "111111111111111110000" + // ToShortest(100000000000000000000.0) -> "100000000000000000000" + // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" + // + // Note: the conversion may round the output if the returned string + // is accurate enough to uniquely identify the input-number. + // For example the most precise representation of the double 9e59 equals + // "899999999999999918767229449717619953810131273674690656206848", but + // the converter will return the shorter (but still correct) "9e59". + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except when the input value is special and no infinity_symbol or + // nan_symbol has been given to the constructor. + bool ToShortest(double value, StringBuilder* result_builder) const { + return ToShortestIeeeNumber(value, result_builder, SHORTEST); + } + + // Same as ToShortest, but for single-precision floats. + bool ToShortestSingle(float value, StringBuilder* result_builder) const { + return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE); + } + + + // Computes a decimal representation with a fixed number of digits after the + // decimal point. The last emitted digit is rounded. + // + // Examples: + // ToFixed(3.12, 1) -> "3.1" + // ToFixed(3.1415, 3) -> "3.142" + // ToFixed(1234.56789, 4) -> "1234.5679" + // ToFixed(1.23, 5) -> "1.23000" + // ToFixed(0.1, 4) -> "0.1000" + // ToFixed(1e30, 2) -> "1000000000000000019884624838656.00" + // ToFixed(0.1, 30) -> "0.100000000000000005551115123126" + // ToFixed(0.1, 17) -> "0.10000000000000001" + // + // If requested_digits equals 0, then the tail of the result depends on + // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT. + // Examples, for requested_digits == 0, + // let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be + // - false and false: then 123.45 -> 123 + // 0.678 -> 1 + // - true and false: then 123.45 -> 123. + // 0.678 -> 1. + // - true and true: then 123.45 -> 123.0 + // 0.678 -> 1.0 + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - 'value' > 10^kMaxFixedDigitsBeforePoint, or + // - 'requested_digits' > kMaxFixedDigitsAfterPoint. + // The last two conditions imply that the result will never contain more than + // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters + // (one additional character for the sign, and one for the decimal point). + bool ToFixed(double value, + int requested_digits, + StringBuilder* result_builder) const; + + // Computes a representation in exponential format with requested_digits + // after the decimal point. The last emitted digit is rounded. + // If requested_digits equals -1, then the shortest exponential representation + // is computed. + // + // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and + // exponent_character set to 'e'. + // ToExponential(3.12, 1) -> "3.1e0" + // ToExponential(5.0, 3) -> "5.000e0" + // ToExponential(0.001, 2) -> "1.00e-3" + // ToExponential(3.1415, -1) -> "3.1415e0" + // ToExponential(3.1415, 4) -> "3.1415e0" + // ToExponential(3.1415, 3) -> "3.142e0" + // ToExponential(123456789000000, 3) -> "1.235e14" + // ToExponential(1000000000000000019884624838656.0, -1) -> "1e30" + // ToExponential(1000000000000000019884624838656.0, 32) -> + // "1.00000000000000001988462483865600e30" + // ToExponential(1234, 0) -> "1e3" + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - 'requested_digits' > kMaxExponentialDigits. + // The last condition implies that the result will never contain more than + // kMaxExponentialDigits + 8 characters (the sign, the digit before the + // decimal point, the decimal point, the exponent character, the + // exponent's sign, and at most 3 exponent digits). + bool ToExponential(double value, + int requested_digits, + StringBuilder* result_builder) const; + + // Computes 'precision' leading digits of the given 'value' and returns them + // either in exponential or decimal format, depending on + // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the + // constructor). + // The last computed digit is rounded. + // + // Example with max_leading_padding_zeroes_in_precision_mode = 6. + // ToPrecision(0.0000012345, 2) -> "0.0000012" + // ToPrecision(0.00000012345, 2) -> "1.2e-7" + // Similarily the converter may add up to + // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid + // returning an exponential representation. A zero added by the + // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: + // ToPrecision(230.0, 2) -> "230" + // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. + // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. + // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no + // EMIT_TRAILING_ZERO_AFTER_POINT: + // ToPrecision(123450.0, 6) -> "123450" + // ToPrecision(123450.0, 5) -> "123450" + // ToPrecision(123450.0, 4) -> "123500" + // ToPrecision(123450.0, 3) -> "123000" + // ToPrecision(123450.0, 2) -> "1.2e5" + // + // Returns true if the conversion succeeds. The conversion always succeeds + // except for the following cases: + // - the input value is special and no infinity_symbol or nan_symbol has + // been provided to the constructor, + // - precision < kMinPericisionDigits + // - precision > kMaxPrecisionDigits + // The last condition implies that the result will never contain more than + // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the + // exponent character, the exponent's sign, and at most 3 exponent digits). + bool ToPrecision(double value, + int precision, + StringBuilder* result_builder) const; +#endif // not needed for ICU + + enum DtoaMode { + // Produce the shortest correct representation. + // For example the output of 0.299999999999999988897 is (the less accurate + // but correct) 0.3. + SHORTEST, + // Same as SHORTEST, but for single-precision floats. + SHORTEST_SINGLE, + // Produce a fixed number of digits after the decimal point. + // For instance fixed(0.1, 4) becomes 0.1000 + // If the input number is big, the output will be big. + FIXED, + // Fixed number of digits (independent of the decimal point). + PRECISION + }; + + // The maximal number of digits that are needed to emit a double in base 10. + // A higher precision can be achieved by using more digits, but the shortest + // accurate representation of any double will never use more digits than + // kBase10MaximalLength. + // Note that DoubleToAscii null-terminates its input. So the given buffer + // should be at least kBase10MaximalLength + 1 characters long. + static const int kBase10MaximalLength = 17; + + // Converts the given double 'v' to ascii. 'v' must not be NaN, +Infinity, or + // -Infinity. In SHORTEST_SINGLE-mode this restriction also applies to 'v' + // after it has been casted to a single-precision float. That is, in this + // mode static_cast<float>(v) must not be NaN, +Infinity or -Infinity. + // + // The result should be interpreted as buffer * 10^(point-length). + // + // The output depends on the given mode: + // - SHORTEST: produce the least amount of digits for which the internal + // identity requirement is still satisfied. If the digits are printed + // (together with the correct exponent) then reading this number will give + // 'v' again. The buffer will choose the representation that is closest to + // 'v'. If there are two at the same distance, than the one farther away + // from 0 is chosen (halfway cases - ending with 5 - are rounded up). + // In this mode the 'requested_digits' parameter is ignored. + // - SHORTEST_SINGLE: same as SHORTEST but with single-precision. + // - FIXED: produces digits necessary to print a given number with + // 'requested_digits' digits after the decimal point. The produced digits + // might be too short in which case the caller has to fill the remainder + // with '0's. + // Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. + // Halfway cases are rounded towards +/-Infinity (away from 0). The call + // toFixed(0.15, 2) thus returns buffer="2", point=0. + // The returned buffer may contain digits that would be truncated from the + // shortest representation of the input. + // - PRECISION: produces 'requested_digits' where the first digit is not '0'. + // Even though the length of produced digits usually equals + // 'requested_digits', the function is allowed to return fewer digits, in + // which case the caller has to fill the missing digits with '0's. + // Halfway cases are again rounded away from 0. + // DoubleToAscii expects the given buffer to be big enough to hold all + // digits and a terminating null-character. In SHORTEST-mode it expects a + // buffer of at least kBase10MaximalLength + 1. In all other modes the + // requested_digits parameter and the padding-zeroes limit the size of the + // output. Don't forget the decimal point, the exponent character and the + // terminating null-character when computing the maximal output size. + // The given length is only used in debug mode to ensure the buffer is big + // enough. + // ICU PATCH: Export this as U_I18N_API for unit tests. + static void U_I18N_API DoubleToAscii(double v, + DtoaMode mode, + int requested_digits, + char* buffer, + int buffer_length, + bool* sign, + int* length, + int* point); + +#if 0 // not needed for ICU + private: + // Implementation for ToShortest and ToShortestSingle. + bool ToShortestIeeeNumber(double value, + StringBuilder* result_builder, + DtoaMode mode) const; + + // If the value is a special value (NaN or Infinity) constructs the + // corresponding string using the configured infinity/nan-symbol. + // If either of them is NULL or the value is not special then the + // function returns false. + bool HandleSpecialValues(double value, StringBuilder* result_builder) const; + // Constructs an exponential representation (i.e. 1.234e56). + // The given exponent assumes a decimal point after the first decimal digit. + void CreateExponentialRepresentation(const char* decimal_digits, + int length, + int exponent, + StringBuilder* result_builder) const; + // Creates a decimal representation (i.e 1234.5678). + void CreateDecimalRepresentation(const char* decimal_digits, + int length, + int decimal_point, + int digits_after_point, + StringBuilder* result_builder) const; + + const int flags_; + const char* const infinity_symbol_; + const char* const nan_symbol_; + const char exponent_character_; + const int decimal_in_shortest_low_; + const int decimal_in_shortest_high_; + const int max_leading_padding_zeroes_in_precision_mode_; + const int max_trailing_padding_zeroes_in_precision_mode_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter); +}; + + +class StringToDoubleConverter { + public: + // Enumeration for allowing octals and ignoring junk when converting + // strings to numbers. + enum Flags { + NO_FLAGS = 0, + ALLOW_HEX = 1, + ALLOW_OCTALS = 2, + ALLOW_TRAILING_JUNK = 4, + ALLOW_LEADING_SPACES = 8, + ALLOW_TRAILING_SPACES = 16, + ALLOW_SPACES_AFTER_SIGN = 32 + }; + + // Flags should be a bit-or combination of the possible Flags-enum. + // - NO_FLAGS: no special flags. + // - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers. + // Ex: StringToDouble("0x1234") -> 4660.0 + // In StringToDouble("0x1234.56") the characters ".56" are trailing + // junk. The result of the call is hence dependent on + // the ALLOW_TRAILING_JUNK flag and/or the junk value. + // With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK, + // the string will not be parsed as "0" followed by junk. + // + // - ALLOW_OCTALS: recognizes the prefix "0" for octals: + // If a sequence of octal digits starts with '0', then the number is + // read as octal integer. Octal numbers may only be integers. + // Ex: StringToDouble("01234") -> 668.0 + // StringToDouble("012349") -> 12349.0 // Not a sequence of octal + // // digits. + // In StringToDouble("01234.56") the characters ".56" are trailing + // junk. The result of the call is hence dependent on + // the ALLOW_TRAILING_JUNK flag and/or the junk value. + // In StringToDouble("01234e56") the characters "e56" are trailing + // junk, too. + // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of + // a double literal. + // - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces, + // new-lines, and tabs. + // - ALLOW_TRAILING_SPACES: ignore trailing whitespace. + // - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign. + // Ex: StringToDouble("- 123.2") -> -123.2. + // StringToDouble("+ 123.2") -> 123.2 + // + // empty_string_value is returned when an empty string is given as input. + // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string + // containing only spaces is converted to the 'empty_string_value', too. + // + // junk_string_value is returned when + // a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not + // part of a double-literal) is found. + // b) ALLOW_TRAILING_JUNK is set, but the string does not start with a + // double literal. + // + // infinity_symbol and nan_symbol are strings that are used to detect + // inputs that represent infinity and NaN. They can be null, in which case + // they are ignored. + // The conversion routine first reads any possible signs. Then it compares the + // following character of the input-string with the first character of + // the infinity, and nan-symbol. If either matches, the function assumes, that + // a match has been found, and expects the following input characters to match + // the remaining characters of the special-value symbol. + // This means that the following restrictions apply to special-value symbols: + // - they must not start with signs ('+', or '-'), + // - they must not have the same first character. + // - they must not start with digits. + // + // Examples: + // flags = ALLOW_HEX | ALLOW_TRAILING_JUNK, + // empty_string_value = 0.0, + // junk_string_value = NaN, + // infinity_symbol = "infinity", + // nan_symbol = "nan": + // StringToDouble("0x1234") -> 4660.0. + // StringToDouble("0x1234K") -> 4660.0. + // StringToDouble("") -> 0.0 // empty_string_value. + // StringToDouble(" ") -> NaN // junk_string_value. + // StringToDouble(" 1") -> NaN // junk_string_value. + // StringToDouble("0x") -> NaN // junk_string_value. + // StringToDouble("-123.45") -> -123.45. + // StringToDouble("--123.45") -> NaN // junk_string_value. + // StringToDouble("123e45") -> 123e45. + // StringToDouble("123E45") -> 123e45. + // StringToDouble("123e+45") -> 123e45. + // StringToDouble("123E-45") -> 123e-45. + // StringToDouble("123e") -> 123.0 // trailing junk ignored. + // StringToDouble("123e-") -> 123.0 // trailing junk ignored. + // StringToDouble("+NaN") -> NaN // NaN string literal. + // StringToDouble("-infinity") -> -inf. // infinity literal. + // StringToDouble("Infinity") -> NaN // junk_string_value. + // + // flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES, + // empty_string_value = 0.0, + // junk_string_value = NaN, + // infinity_symbol = NULL, + // nan_symbol = NULL: + // StringToDouble("0x1234") -> NaN // junk_string_value. + // StringToDouble("01234") -> 668.0. + // StringToDouble("") -> 0.0 // empty_string_value. + // StringToDouble(" ") -> 0.0 // empty_string_value. + // StringToDouble(" 1") -> 1.0 + // StringToDouble("0x") -> NaN // junk_string_value. + // StringToDouble("0123e45") -> NaN // junk_string_value. + // StringToDouble("01239E45") -> 1239e45. + // StringToDouble("-infinity") -> NaN // junk_string_value. + // StringToDouble("NaN") -> NaN // junk_string_value. + StringToDoubleConverter(int flags, + double empty_string_value, + double junk_string_value, + const char* infinity_symbol, + const char* nan_symbol) + : flags_(flags), + empty_string_value_(empty_string_value), + junk_string_value_(junk_string_value), + infinity_symbol_(infinity_symbol), + nan_symbol_(nan_symbol) { + } + + // Performs the conversion. + // The output parameter 'processed_characters_count' is set to the number + // of characters that have been processed to read the number. + // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included + // in the 'processed_characters_count'. Trailing junk is never included. + double StringToDouble(const char* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToDouble above but for 16 bit characters. + double StringToDouble(const uc16* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToDouble but reads a float. + // Note that this is not equivalent to static_cast<float>(StringToDouble(...)) + // due to potential double-rounding. + float StringToFloat(const char* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringToFloat above but for 16 bit characters. + float StringToFloat(const uc16* buffer, + int length, + int* processed_characters_count) const; + + private: + const int flags_; + const double empty_string_value_; + const double junk_string_value_; + const char* const infinity_symbol_; + const char* const nan_symbol_; + + template <class Iterator> + double StringToIeee(Iterator start_pointer, + int length, + bool read_as_double, + int* processed_characters_count) const; + + DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter); +#endif // not needed for ICU +}; + +} // namespace double_conversion + +// ICU PATCH: Close ICU namespace +U_NAMESPACE_END + +#endif // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ +#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/dtptngen.cpp b/deps/icu-small/source/i18n/dtptngen.cpp index 187342e4af..aefd70464e 100644 --- a/deps/icu-small/source/i18n/dtptngen.cpp +++ b/deps/icu-small/source/i18n/dtptngen.cpp @@ -261,12 +261,21 @@ static const char* const CLDR_FIELD_APPEND[] = { "Hour", "Minute", "Second", "*", "Timezone" }; -static const char* const CLDR_FIELD_NAME[] = { +static const char* const CLDR_FIELD_NAME[UDATPG_FIELD_COUNT] = { "era", "year", "quarter", "month", "week", "weekOfMonth", "weekday", "dayOfYear", "weekdayOfMonth", "day", "dayperiod", // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J "hour", "minute", "second", "*", "zone" }; +static const char* const CLDR_FIELD_WIDTH[] = { // [UDATPG_WIDTH_COUNT] + "", "-short", "-narrow" +}; + +// TODO(ticket:13619): remove when definition uncommented in dtptngen.h. +static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; +static constexpr UDateTimePGDisplayWidth UDATPG_WIDTH_APPENDITEM = UDATPG_WIDE; +static constexpr int32_t UDATPG_FIELD_KEY_MAX = 24; // max length of CLDR field tag (type + width) + // For appendItems static const UChar UDATPG_ItemFormat[]= {0x7B, 0x30, 0x7D, 0x20, 0x251C, 0x7B, 0x32, 0x7D, 0x3A, 0x20, 0x7B, 0x31, 0x7D, 0x2524, 0}; // {0} \u251C{2}: {1}\u2524 @@ -379,10 +388,11 @@ DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { } for (int32_t i=0; i< UDATPG_FIELD_COUNT; ++i ) { appendItemFormats[i] = other.appendItemFormats[i]; - appendItemNames[i] = other.appendItemNames[i]; - // NUL-terminate for the C API. - appendItemFormats[i].getTerminatedBuffer(); - appendItemNames[i].getTerminatedBuffer(); + appendItemFormats[i].getTerminatedBuffer(); // NUL-terminate for the C API. + for (int32_t j=0; j< UDATPG_WIDTH_COUNT; ++j ) { + fieldDisplayNames[i][j] = other.fieldDisplayNames[i][j]; + fieldDisplayNames[i][j].getTerminatedBuffer(); // NUL-terminate for the C API. + } } UErrorCode status = U_ZERO_ERROR; patternMap->copyFrom(*other.patternMap, status); @@ -399,10 +409,14 @@ DateTimePatternGenerator::operator==(const DateTimePatternGenerator& other) cons if ((pLocale==other.pLocale) && (patternMap->equals(*other.patternMap)) && (dateTimeFormat==other.dateTimeFormat) && (decimal==other.decimal)) { for ( int32_t i=0 ; i<UDATPG_FIELD_COUNT; ++i ) { - if ((appendItemFormats[i] != other.appendItemFormats[i]) || - (appendItemNames[i] != other.appendItemNames[i]) ) { - return FALSE; - } + if (appendItemFormats[i] != other.appendItemFormats[i]) { + return FALSE; + } + for (int32_t j=0; j< UDATPG_WIDTH_COUNT; ++j ) { + if (fieldDisplayNames[i][j] != other.fieldDisplayNames[i][j]) { + return FALSE; + } + } } return TRUE; } @@ -824,15 +838,16 @@ struct DateTimePatternGenerator::AppendItemNamesSink : public ResourceSink { ResourceTable itemsTable = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } for (int32_t i = 0; itemsTable.getKeyAndValue(i, key, value); ++i) { - UDateTimePatternField field = dtpg.getAppendNameNumber(key); + UDateTimePGDisplayWidth width; + UDateTimePatternField field = dtpg.getFieldAndWidthIndices(key, &width); if (field == UDATPG_FIELD_COUNT) { continue; } ResourceTable detailsTable = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } for (int32_t j = 0; detailsTable.getKeyAndValue(j, key, value); ++j) { if (uprv_strcmp(key, "dn") != 0) { continue; } const UnicodeString& valueStr = value.getUnicodeString(errorCode); - if (dtpg.getAppendItemName(field).isEmpty() && !valueStr.isEmpty()) { - dtpg.setAppendItemName(field, valueStr); + if (dtpg.getFieldDisplayName(field,width).isEmpty() && !valueStr.isEmpty()) { + dtpg.setFieldDisplayName(field,width,valueStr); } break; } @@ -841,8 +856,7 @@ struct DateTimePatternGenerator::AppendItemNamesSink : public ResourceSink { void fillInMissing() { for (int32_t i = 0; i < UDATPG_FIELD_COUNT; i++) { - UDateTimePatternField field = (UDateTimePatternField)i; - UnicodeString& valueStr = dtpg.getMutableAppendItemName(field); + UnicodeString& valueStr = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, UDATPG_WIDE); if (valueStr.isEmpty()) { valueStr = CAP_F; U_ASSERT(i < 20); @@ -857,6 +871,12 @@ struct DateTimePatternGenerator::AppendItemNamesSink : public ResourceSink { // NUL-terminate for the C API. valueStr.getTerminatedBuffer(); } + for (int32_t j = 1; j < UDATPG_WIDTH_COUNT; j++) { + UnicodeString& valueStr = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)j); + if (valueStr.isEmpty()) { + valueStr = dtpg.getFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)(j-1)); + } + } } } }; @@ -969,25 +989,35 @@ DateTimePatternGenerator::getAppendItemFormat(UDateTimePatternField field) const void DateTimePatternGenerator::setAppendItemName(UDateTimePatternField field, const UnicodeString& value) { - appendItemNames[field] = value; - // NUL-terminate for the C API. - appendItemNames[field].getTerminatedBuffer(); + setFieldDisplayName(field, UDATPG_WIDTH_APPENDITEM, value); } const UnicodeString& DateTimePatternGenerator::getAppendItemName(UDateTimePatternField field) const { - return appendItemNames[field]; + return fieldDisplayNames[field][UDATPG_WIDTH_APPENDITEM]; +} + +void +DateTimePatternGenerator::setFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width, const UnicodeString& value) { + fieldDisplayNames[field][width] = value; + // NUL-terminate for the C API. + fieldDisplayNames[field][width].getTerminatedBuffer(); +} + +UnicodeString +DateTimePatternGenerator::getFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) const { + return fieldDisplayNames[field][width]; } UnicodeString& -DateTimePatternGenerator::getMutableAppendItemName(UDateTimePatternField field) { - return appendItemNames[field]; +DateTimePatternGenerator::getMutableFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) { + return fieldDisplayNames[field][width]; } void DateTimePatternGenerator::getAppendName(UDateTimePatternField field, UnicodeString& value) { value = SINGLE_QUOTE; - value += appendItemNames[field]; + value += fieldDisplayNames[field][UDATPG_WIDTH_APPENDITEM]; value += SINGLE_QUOTE; } @@ -1312,9 +1342,23 @@ DateTimePatternGenerator::getAppendFormatNumber(const char* field) const { } UDateTimePatternField -DateTimePatternGenerator::getAppendNameNumber(const char* field) const { +DateTimePatternGenerator::getFieldAndWidthIndices(const char* key, UDateTimePGDisplayWidth* widthP) const { + char cldrFieldKey[UDATPG_FIELD_KEY_MAX + 1]; + uprv_strncpy(cldrFieldKey, key, UDATPG_FIELD_KEY_MAX); + cldrFieldKey[UDATPG_FIELD_KEY_MAX]=0; // ensure termination + *widthP = UDATPG_WIDE; + char* hyphenPtr = uprv_strchr(cldrFieldKey, '-'); + if (hyphenPtr) { + for (int32_t i=UDATPG_WIDTH_COUNT-1; i>0; --i) { + if (uprv_strcmp(CLDR_FIELD_WIDTH[i], hyphenPtr)==0) { + *widthP=(UDateTimePGDisplayWidth)i; + break; + } + } + *hyphenPtr = 0; // now delete width portion of key + } for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i ) { - if (uprv_strcmp(CLDR_FIELD_NAME[i],field)==0) { + if (uprv_strcmp(CLDR_FIELD_NAME[i],cldrFieldKey)==0) { return (UDateTimePatternField)i; } } diff --git a/deps/icu-small/source/i18n/islamcal.cpp b/deps/icu-small/source/i18n/islamcal.cpp index 733301d98e..11615a1e51 100644 --- a/deps/icu-small/source/i18n/islamcal.cpp +++ b/deps/icu-small/source/i18n/islamcal.cpp @@ -614,7 +614,7 @@ void IslamicCalendar::handleComputeFields(int32_t julianDay, UErrorCode &status) days = julianDay - ASTRONOMICAL_EPOC; } // Use the civil calendar approximation, which is just arithmetic - year = (int)ClockMath::floorDivide( (double)(30 * days + 10646) , 10631.0 ); + year = (int32_t)ClockMath::floorDivide(30 * (int64_t)days + 10646, (int64_t)10631); month = (int32_t)uprv_ceil((days - 29 - yearStart(year)) / 29.5 ); month = month<11?month:11; startDate = monthStart(year, month); diff --git a/deps/icu-small/source/i18n/measfmt.cpp b/deps/icu-small/source/i18n/measfmt.cpp index 628c8f8992..996a20c2e0 100644 --- a/deps/icu-small/source/i18n/measfmt.cpp +++ b/deps/icu-small/source/i18n/measfmt.cpp @@ -764,10 +764,11 @@ UnicodeString &MeasureFormat::formatMeasurePerUnit( if (U_FAILURE(status)) { return appendTo; } - MeasureUnit *resolvedUnit = - MeasureUnit::resolveUnitPerUnit(measure.getUnit(), perUnit); - if (resolvedUnit != NULL) { - Measure newMeasure(measure.getNumber(), resolvedUnit, status); + bool isResolved = false; + MeasureUnit resolvedUnit = + MeasureUnit::resolveUnitPerUnit(measure.getUnit(), perUnit, &isResolved); + if (isResolved) { + Measure newMeasure(measure.getNumber(), new MeasureUnit(resolvedUnit), status); return formatMeasure( newMeasure, **numberFormat, appendTo, pos, status); } @@ -1061,9 +1062,13 @@ UnicodeString &MeasureFormat::formatNumeric( } // Format time. draft becomes something like '5:30:45' + // #13606: DateFormat is not thread-safe, but MeasureFormat advertises itself as thread-safe. FieldPosition smallestFieldPosition(smallestField); UnicodeString draft; + static UMutex dateFmtMutex = U_MUTEX_INITIALIZER; + umtx_lock(&dateFmtMutex); dateFmt.format(date, draft, smallestFieldPosition, status); + umtx_unlock(&dateFmtMutex); // If we find field for smallest amount replace it with the formatted // smallest amount from above taking care to replace the integer part diff --git a/deps/icu-small/source/i18n/measunit.cpp b/deps/icu-small/source/i18n/measunit.cpp index 580afc0df5..e21afcba02 100644 --- a/deps/icu-small/source/i18n/measunit.cpp +++ b/deps/icu-small/source/i18n/measunit.cpp @@ -1211,8 +1211,8 @@ int32_t MeasureUnit::internalGetIndexForTypeAndSubtype(const char *type, const c return gIndexes[t] + st - gOffsets[t]; } -MeasureUnit *MeasureUnit::resolveUnitPerUnit( - const MeasureUnit &unit, const MeasureUnit &perUnit) { +MeasureUnit MeasureUnit::resolveUnitPerUnit( + const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved) { int32_t unitOffset = unit.getOffset(); int32_t perUnitOffset = perUnit.getOffset(); @@ -1233,10 +1233,13 @@ MeasureUnit *MeasureUnit::resolveUnitPerUnit( } else { // We found a resolution for our unit / per-unit combo // return it. - return new MeasureUnit(midRow[2], midRow[3]); + *isResolved = true; + return MeasureUnit(midRow[2], midRow[3]); } } - return NULL; + + *isResolved = false; + return MeasureUnit(); } MeasureUnit *MeasureUnit::create(int typeId, int subTypeId, UErrorCode &status) { diff --git a/deps/icu-small/source/i18n/nfrs.cpp b/deps/icu-small/source/i18n/nfrs.cpp index b2d08889e6..659cfcbbf5 100644 --- a/deps/icu-small/source/i18n/nfrs.cpp +++ b/deps/icu-small/source/i18n/nfrs.cpp @@ -681,7 +681,7 @@ static void dumpUS(FILE* f, const UnicodeString& us) { #endif UBool -NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const +NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const { // try matching each rule in the rule set against the text being // parsed. Whichever one matches the most characters is the one @@ -707,9 +707,12 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun #endif // Try each of the negative rules, fraction rules, infinity rules and NaN rules for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { - if (nonNumericalRules[i]) { + if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) { + // Mark this rule as being executed so that we don't try to execute it again. + nonNumericalExecutedRuleMask |= 1 << i; + Formattable tempResult; - UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); + UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult); if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { result = tempResult; highWaterMark = workingPos; @@ -748,7 +751,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun continue; } Formattable tempResult; - UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); + UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult); if (success && workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark = workingPos; diff --git a/deps/icu-small/source/i18n/nfrs.h b/deps/icu-small/source/i18n/nfrs.h index 34846ed297..c56fc07078 100644 --- a/deps/icu-small/source/i18n/nfrs.h +++ b/deps/icu-small/source/i18n/nfrs.h @@ -55,7 +55,7 @@ public: void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; - UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const; + UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; void appendRules(UnicodeString& result) const; // toString diff --git a/deps/icu-small/source/i18n/nfrule.cpp b/deps/icu-small/source/i18n/nfrule.cpp index 2c26aff2d1..9f5deb3168 100644 --- a/deps/icu-small/source/i18n/nfrule.cpp +++ b/deps/icu-small/source/i18n/nfrule.cpp @@ -900,6 +900,7 @@ NFRule::doParse(const UnicodeString& text, ParsePosition& parsePosition, UBool isFractionRule, double upperBound, + uint32_t nonNumericalExecutedRuleMask, Formattable& resVal) const { // internally we operate on a copy of the string being parsed @@ -1002,6 +1003,7 @@ NFRule::doParse(const UnicodeString& text, temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos); double partialResult = matchToDelimiter(workText, start, tempBaseValue, temp, pp, sub1, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match (or were trying to match a @@ -1022,6 +1024,7 @@ NFRule::doParse(const UnicodeString& text, temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos); partialResult = matchToDelimiter(workText2, 0, partialResult, temp, pp2, sub2, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match on this second @@ -1158,6 +1161,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, + uint32_t nonNumericalExecutedRuleMask, double upperBound) const { UErrorCode status = U_ZERO_ERROR; @@ -1191,6 +1195,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); // if the substitution could match all the text up to @@ -1244,6 +1249,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); if (success && (tempPP.getIndex() != 0)) { // if there's a successful match (or it's a null diff --git a/deps/icu-small/source/i18n/nfrule.h b/deps/icu-small/source/i18n/nfrule.h index 21cdd24fbd..843a4a0762 100644 --- a/deps/icu-small/source/i18n/nfrule.h +++ b/deps/icu-small/source/i18n/nfrule.h @@ -74,6 +74,7 @@ public: ParsePosition& pos, UBool isFractional, double upperBound, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; UBool shouldRollBack(int64_t number) const; @@ -94,6 +95,7 @@ private: int32_t indexOfAnyRulePrefix() const; double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue, const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, + uint32_t nonNumericalExecutedRuleMask, double upperBound) const; void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const; diff --git a/deps/icu-small/source/i18n/nfsubs.cpp b/deps/icu-small/source/i18n/nfsubs.cpp index 1a0914152d..ea817453d8 100644 --- a/deps/icu-small/source/i18n/nfsubs.cpp +++ b/deps/icu-small/source/i18n/nfsubs.cpp @@ -155,6 +155,7 @@ public: double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { @@ -221,6 +222,7 @@ public: double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; } @@ -292,6 +294,7 @@ public: double baseValue, double upperBound, UBool /*lenientParse*/, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; } @@ -689,6 +692,7 @@ NFSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const { #ifdef RBNF_DEBUG @@ -709,7 +713,7 @@ NFSubstitution::doParse(const UnicodeString& text, // on), then also try parsing the text using a default- // constructed NumberFormat if (ruleSet != NULL) { - ruleSet->parse(text, parsePosition, upperBound, result); + ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result); if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) { UErrorCode status = U_ZERO_ERROR; NumberFormat* fmt = NumberFormat::createInstance(status); @@ -931,18 +935,19 @@ ModulusSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const { // if this isn't a >>> substitution, we can just use the // inherited parse() routine to do the parsing if (ruleToUse == NULL) { - return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result); + return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result); // but if it IS a >>> substitution, we have to do it here: we // use the specific rule's doParse() method, and then we have to // do some of the other work of NFRuleSet.parse() } else { - ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result); + ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result); if (parsePosition.getIndex() != 0) { UErrorCode status = U_ZERO_ERROR; @@ -1118,12 +1123,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text, double baseValue, double /*upperBound*/, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& resVal) const { // if we're not in byDigits mode, we can just use the inherited // doParse() if (!byDigits) { - return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal); + return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal); // if we ARE in byDigits mode, parse the text one digit at a time // using this substitution's owning rule set (we do this by setting @@ -1141,7 +1147,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text, while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); Formattable temp; - getRuleSet()->parse(workText, workPos, 10, temp); + getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp); UErrorCode status = U_ZERO_ERROR; digit = temp.getLong(status); // digit = temp.getType() == Formattable::kLong ? @@ -1249,6 +1255,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool /*lenientParse*/, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const { // we don't have to do anything special to do the parsing here, @@ -1267,7 +1274,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); - getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all + getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all if (workPos.getIndex() == 0) { // we failed, either there were no more zeros, or the number was formatted with digits // either way, we're done @@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, } // we've parsed off the zeros, now let's parse the rest from our current position - NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result); + NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result); if (withZeros) { // any base value will do in this case. is there a way to diff --git a/deps/icu-small/source/i18n/nfsubs.h b/deps/icu-small/source/i18n/nfsubs.h index e77f7ada8c..08de06f2a3 100644 --- a/deps/icu-small/source/i18n/nfsubs.h +++ b/deps/icu-small/source/i18n/nfsubs.h @@ -191,6 +191,7 @@ public: double baseValue, double upperBound, UBool lenientParse, + uint32_t nonNumericalExecutedRuleMask, Formattable& result) const; /** diff --git a/deps/icu-small/source/i18n/number_affixutils.cpp b/deps/icu-small/source/i18n/number_affixutils.cpp index 4dfdbc7ab7..df4b267af5 100644 --- a/deps/icu-small/source/i18n/number_affixutils.cpp +++ b/deps/icu-small/source/i18n/number_affixutils.cpp @@ -70,6 +70,7 @@ int32_t AffixUtils::estimateLength(const CharSequence &patternString, UErrorCode case STATE_FIRST_QUOTE: case STATE_INSIDE_QUOTE: status = U_ILLEGAL_ARGUMENT_ERROR; + break; default: break; } diff --git a/deps/icu-small/source/i18n/number_compact.cpp b/deps/icu-small/source/i18n/number_compact.cpp index 8ceee1378b..cc0d8fd2a2 100644 --- a/deps/icu-small/source/i18n/number_compact.cpp +++ b/deps/icu-small/source/i18n/number_compact.cpp @@ -262,7 +262,6 @@ void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReferen buildReference.setPatternInfo(&patternInfo); info.mod = buildReference.createImmutable(status); if (U_FAILURE(status)) { return; } - info.numDigits = patternInfo.positive.integerTotal; info.patternString = patternString; } } @@ -286,7 +285,6 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr StandardPlural::Form plural = quantity.getStandardPlural(rules); const UChar *patternString = data.getPattern(magnitude, plural); - int numDigits = -1; if (patternString == nullptr) { // Use the default (non-compact) modifier. // No need to take any action. @@ -299,7 +297,6 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr const CompactModInfo &info = precomputedMods[i]; if (u_strcmp(patternString, info.patternString) == 0) { info.mod->applyToMicros(micros, quantity); - numDigits = info.numDigits; break; } } @@ -313,12 +310,8 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); static_cast<MutablePatternModifier*>(const_cast<Modifier*>(micros.modMiddle)) ->setPatternInfo(&patternInfo); - numDigits = patternInfo.positive.integerTotal; } - // FIXME: Deal with numDigits == 0 (Awaiting a test case) - (void)numDigits; - // We already performed rounding. Do not perform it again. micros.rounding = Rounder::constructPassThrough(); } diff --git a/deps/icu-small/source/i18n/number_compact.h b/deps/icu-small/source/i18n/number_compact.h index 2344abf535..f7adf36416 100644 --- a/deps/icu-small/source/i18n/number_compact.h +++ b/deps/icu-small/source/i18n/number_compact.h @@ -52,7 +52,6 @@ class CompactData : public MultiplierProducer { struct CompactModInfo { const ImmutablePatternModifier *mod; const UChar* patternString; - int32_t numDigits; }; class CompactHandler : public MicroPropsGenerator, public UMemory { diff --git a/deps/icu-small/source/i18n/number_decimalquantity.cpp b/deps/icu-small/source/i18n/number_decimalquantity.cpp index 7246357666..b68df26ba2 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.cpp +++ b/deps/icu-small/source/i18n/number_decimalquantity.cpp @@ -14,12 +14,15 @@ #include "decContext.h" #include "decNumber.h" #include "number_roundingutils.h" +#include "double-conversion.h" #include "unicode/plurrule.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; +using icu::double_conversion::DoubleToStringConverter; + namespace { int8_t NEGATIVE_FLAG = 1; @@ -265,6 +268,10 @@ bool DecimalQuantity::isNegative() const { return (flags & NEGATIVE_FLAG) != 0; } +int8_t DecimalQuantity::signum() const { + return isNegative() ? -1 : isZero() ? 0 : 1; +} + bool DecimalQuantity::isInfinite() const { return (flags & INFINITY_FLAG) != 0; } @@ -392,31 +399,27 @@ void DecimalQuantity::_setToDoubleFast(double n) { } void DecimalQuantity::convertToAccurateDouble() { - double n = origDouble; - U_ASSERT(n != 0); + U_ASSERT(origDouble != 0); int32_t delta = origDelta; - setBcdToZero(); - // Call the slow oracle function (Double.toString in Java, sprintf in C++). - // The <float.h> constant DBL_DIG defines a platform-specific number of digits in a double. - // However, this tends to be too low (see #11318). Instead, we always use 14 decimal places. - static constexpr size_t CAP = 1 + 14 + 8; // Extra space for '+', '.', e+NNN, and '\0' - char dstr[CAP]; - snprintf(dstr, CAP, "%+1.14e", n); - - // uprv_decNumberFromString() will parse the string expecting '.' as a - // decimal separator, however sprintf() can use ',' in certain locales. - // Overwrite a ',' with '.' here before proceeding. - char *decimalSeparator = strchr(dstr, ','); - if (decimalSeparator != nullptr) { - *decimalSeparator = '.'; - } - - StringPiece sp(dstr); - DecNumberWithStorage dn; - stringToDecNumber(dstr, dn); - _setToDecNumber(dn.getAlias()); + // Call the slow oracle function (Double.toString in Java, DoubleToAscii in C++). + char buffer[DoubleToStringConverter::kBase10MaximalLength + 1]; + bool sign; // unused; always positive + int32_t length; + int32_t point; + DoubleToStringConverter::DoubleToAscii( + origDouble, + DoubleToStringConverter::DtoaMode::SHORTEST, + 0, + buffer, + sizeof(buffer), + &sign, + &length, + &point + ); + setBcdToZero(); + readDoubleConversionToBcd(buffer, length, point); scale += delta; explicitExactDouble = true; } @@ -833,6 +836,26 @@ void DecimalQuantity::readDecNumberToBcd(decNumber *dn) { precision = dn->digits; } +void DecimalQuantity::readDoubleConversionToBcd( + const char* buffer, int32_t length, int32_t point) { + // NOTE: Despite the fact that double-conversion's API is called + // "DoubleToAscii", they actually use '0' (as opposed to u8'0'). + if (length > 16) { + ensureCapacity(length); + for (int32_t i = 0; i < length; i++) { + fBCD.bcdBytes.ptr[i] = buffer[length-i-1] - '0'; + } + } else { + uint64_t result = 0L; + for (int32_t i = 0; i < length; i++) { + result |= static_cast<uint64_t>(buffer[length-i-1] - '0') << (4 * i); + } + fBCD.bcdLong = result; + } + scale = point - length; + precision = length; +} + void DecimalQuantity::compact() { if (usingBytes) { int32_t delta = 0; diff --git a/deps/icu-small/source/i18n/number_decimalquantity.h b/deps/icu-small/source/i18n/number_decimalquantity.h index ccb832623c..4309c3c638 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.h +++ b/deps/icu-small/source/i18n/number_decimalquantity.h @@ -115,6 +115,9 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { /** @return Whether the value represented by this {@link DecimalQuantity} is less than zero. */ bool isNegative() const; + /** @return -1 if the value is negative; 1 if positive; or 0 if zero. */ + int8_t signum() const; + /** @return Whether the value represented by this {@link DecimalQuantity} is infinite. */ bool isInfinite() const U_OVERRIDE; @@ -395,6 +398,8 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { void readDecNumberToBcd(decNumber *dn); + void readDoubleConversionToBcd(const char* buffer, int32_t length, int32_t point); + void copyBcdFrom(const DecimalQuantity &other); /** diff --git a/deps/icu-small/source/i18n/number_decimfmtprops.h b/deps/icu-small/source/i18n/number_decimfmtprops.h index 3e25966b6f..96356cad45 100644 --- a/deps/icu-small/source/i18n/number_decimfmtprops.h +++ b/deps/icu-small/source/i18n/number_decimfmtprops.h @@ -19,8 +19,8 @@ U_NAMESPACE_BEGIN // Export an explicit template instantiation of the LocalPointer that is used as a // data member of CurrencyPluralInfoWrapper. -// (MSVC requires this, even though it should not be necessary.) -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= #pragma warning(suppress: 4661) template class U_I18N_API LocalPointerBase<CurrencyPluralInfo>; diff --git a/deps/icu-small/source/i18n/number_fluent.cpp b/deps/icu-small/source/i18n/number_fluent.cpp index 76c3a7ce5c..27113106c5 100644 --- a/deps/icu-small/source/i18n/number_fluent.cpp +++ b/deps/icu-small/source/i18n/number_fluent.cpp @@ -33,12 +33,13 @@ Derived NumberFormatterSettings<Derived>::unit(const icu::MeasureUnit &unit) con } template<typename Derived> -Derived NumberFormatterSettings<Derived>::adoptUnit(const icu::MeasureUnit *unit) const { +Derived NumberFormatterSettings<Derived>::adoptUnit(icu::MeasureUnit *unit) const { Derived copy(*this); // Just copy the unit into the MacroProps by value, and delete it since we have ownership. // NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit. // TimeUnit may be affected, but TimeUnit is not as relevant to number formatting. if (unit != nullptr) { + // TODO: On nullptr, reset to default value? copy.fMacros.unit = *unit; delete unit; } @@ -46,6 +47,26 @@ Derived NumberFormatterSettings<Derived>::adoptUnit(const icu::MeasureUnit *unit } template<typename Derived> +Derived NumberFormatterSettings<Derived>::perUnit(const icu::MeasureUnit &perUnit) const { + Derived copy(*this); + // See comments above about slicing. + copy.fMacros.perUnit = perUnit; + return copy; +} + +template<typename Derived> +Derived NumberFormatterSettings<Derived>::adoptPerUnit(icu::MeasureUnit *perUnit) const { + Derived copy(*this); + // See comments above about slicing and ownership. + if (perUnit != nullptr) { + // TODO: On nullptr, reset to default value? + copy.fMacros.perUnit = *perUnit; + delete perUnit; + } + return copy; +} + +template<typename Derived> Derived NumberFormatterSettings<Derived>::rounding(const Rounder &rounder) const { Derived copy(*this); // NOTE: Slicing is OK. @@ -54,9 +75,11 @@ Derived NumberFormatterSettings<Derived>::rounding(const Rounder &rounder) const } template<typename Derived> -Derived NumberFormatterSettings<Derived>::grouping(const Grouper &grouper) const { +Derived NumberFormatterSettings<Derived>::grouping(const UGroupingStrategy &strategy) const { Derived copy(*this); - copy.fMacros.grouper = grouper; + // NOTE: This is slightly different than how the setting is stored in Java + // because we want to put it on the stack. + copy.fMacros.grouper = Grouper::forStrategy(strategy); return copy; } @@ -75,7 +98,7 @@ Derived NumberFormatterSettings<Derived>::symbols(const DecimalFormatSymbols &sy } template<typename Derived> -Derived NumberFormatterSettings<Derived>::adoptSymbols(const NumberingSystem *ns) const { +Derived NumberFormatterSettings<Derived>::adoptSymbols(NumberingSystem *ns) const { Derived copy(*this); copy.fMacros.symbols.setTo(ns); return copy; diff --git a/deps/icu-small/source/i18n/number_formatimpl.cpp b/deps/icu-small/source/i18n/number_formatimpl.cpp index 9986ce6d8c..bc96cb15da 100644 --- a/deps/icu-small/source/i18n/number_formatimpl.cpp +++ b/deps/icu-small/source/i18n/number_formatimpl.cpp @@ -17,6 +17,8 @@ #include "unicode/dcfmtsym.h" #include "number_scientific.h" #include "number_compact.h" +#include "uresimp.h" +#include "ureslocs.h" using namespace icu; using namespace icu::number; @@ -88,6 +90,37 @@ const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, Cld return pattern; } +struct CurrencyFormatInfoResult { + bool exists; + const char16_t* pattern; + const char16_t* decimalSeparator; + const char16_t* groupingSeparator; +}; +CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) { + // TODO: Load this data in a centralized location like ICU4J? + // TODO: Parts of this same data are loaded in dcfmtsym.cpp; should clean up. + CurrencyFormatInfoResult result = { false, nullptr, nullptr, nullptr }; + if (U_FAILURE(status)) return result; + CharString key; + key.append("Currencies/", status); + key.append(isoCode, status); + UErrorCode localStatus = status; + LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_CURR, locale.getName(), &localStatus)); + ures_getByKeyWithFallback(bundle.getAlias(), key.data(), bundle.getAlias(), &localStatus); + if (U_SUCCESS(localStatus) && ures_getSize(bundle.getAlias())>2) { // the length is 3 if more data is present + ures_getByIndex(bundle.getAlias(), 2, bundle.getAlias(), &localStatus); + int32_t dummy; + result.exists = true; + result.pattern = ures_getStringByIndex(bundle.getAlias(), 0, &dummy, &localStatus); + result.decimalSeparator = ures_getStringByIndex(bundle.getAlias(), 1, &dummy, &localStatus); + result.groupingSeparator = ures_getStringByIndex(bundle.getAlias(), 2, &dummy, &localStatus); + status = localStatus; + } else if (localStatus != U_MISSING_RESOURCE_ERROR) { + status = localStatus; + } + return result; +} + inline bool unitIsCurrency(const MeasureUnit &unit) { return uprv_strcmp("currency", unit.getType()) == 0; } @@ -161,8 +194,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, bool isPercent = isNoUnit && unitIsPercent(macros.unit); bool isPermille = isNoUnit && unitIsPermille(macros.unit); bool isCldrUnit = !isCurrency && !isNoUnit; - bool isAccounting = - macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS; + bool isAccounting = macros.sign == UNUM_SIGN_ACCOUNTING + || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS + || macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; CurrencyUnit currency(kDefaultCurrency, status); if (isCurrency) { currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit @@ -185,20 +219,51 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, } const char *nsName = U_SUCCESS(status) ? ns->getName() : "latn"; - // Load and parse the pattern string. It is used for grouping sizes and affixes only. - CldrPatternStyle patternStyle; - if (isPercent || isPermille) { - patternStyle = CLDR_PATTERN_STYLE_PERCENT; - } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { - patternStyle = CLDR_PATTERN_STYLE_DECIMAL; - } else if (isAccounting) { - // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now, - // the API contract allows us to add support to other units in the future. - patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING; + // Resolve the symbols. Do this here because currency may need to customize them. + if (macros.symbols.isDecimalFormatSymbols()) { + fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); } else { - patternStyle = CLDR_PATTERN_STYLE_CURRENCY; + fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status); + // Give ownership to the NumberFormatterImpl. + fSymbols.adoptInstead(fMicros.symbols); + } + + // Load and parse the pattern string. It is used for grouping sizes and affixes only. + // If we are formatting currency, check for a currency-specific pattern. + const char16_t* pattern = nullptr; + if (isCurrency) { + CurrencyFormatInfoResult info = getCurrencyFormatInfo(macros.locale, currency.getSubtype(), status); + if (info.exists) { + pattern = info.pattern; + // It's clunky to clone an object here, but this code is not frequently executed. + DecimalFormatSymbols* symbols = new DecimalFormatSymbols(*fMicros.symbols); + fMicros.symbols = symbols; + fSymbols.adoptInstead(symbols); + symbols->setSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol, + UnicodeString(info.decimalSeparator), + FALSE); + symbols->setSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol, + UnicodeString(info.groupingSeparator), + FALSE); + } + } + if (pattern == nullptr) { + CldrPatternStyle patternStyle; + if (isPercent || isPermille) { + patternStyle = CLDR_PATTERN_STYLE_PERCENT; + } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { + patternStyle = CLDR_PATTERN_STYLE_DECIMAL; + } else if (isAccounting) { + // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now, + // the API contract allows us to add support to other units in the future. + patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING; + } else { + patternStyle = CLDR_PATTERN_STYLE_CURRENCY; + } + pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status); } - const char16_t *pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status); auto patternInfo = new ParsedPatternInfo(); fPatternInfo.adoptInstead(patternInfo); PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status); @@ -207,15 +272,6 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR /// ///////////////////////////////////////////////////////////////////////////////////// - // Symbols - if (macros.symbols.isDecimalFormatSymbols()) { - fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); - } else { - fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status); - // Give ownership to the NumberFormatterImpl. - fSymbols.adoptInstead(fMicros.symbols); - } - // Rounding strategy if (!macros.rounder.isBogus()) { fMicros.rounding = macros.rounder; @@ -233,11 +289,11 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, fMicros.grouping = macros.grouper; } else if (macros.notation.fType == Notation::NTN_COMPACT) { // Compact notation uses minGrouping by default since ICU 59 - fMicros.grouping = Grouper::minTwoDigits(); + fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_MIN2); } else { - fMicros.grouping = Grouper::defaults(); + fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_AUTO); } - fMicros.grouping.setLocaleData(*fPatternInfo); + fMicros.grouping.setLocaleData(*fPatternInfo, macros.locale); // Padding strategy if (!macros.padder.isBogus()) { @@ -308,6 +364,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, LongNameHandler::forMeasureUnit( macros.locale, macros.unit, + macros.perUnit, unitWidth, resolvePluralRules(macros.rules, macros.locale, status), chain, diff --git a/deps/icu-small/source/i18n/number_grouping.cpp b/deps/icu-small/source/i18n/number_grouping.cpp index 15362825cc..a2b1bbd6b3 100644 --- a/deps/icu-small/source/i18n/number_grouping.cpp +++ b/deps/icu-small/source/i18n/number_grouping.cpp @@ -7,36 +7,70 @@ #include "unicode/numberformatter.h" #include "number_patternstring.h" +#include "uresimp.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; -Grouper Grouper::defaults() { - return {-2, -2, false}; +namespace { + +int16_t getMinGroupingForLocale(const Locale& locale) { + // TODO: Cache this? + UErrorCode localStatus = U_ZERO_ERROR; + LocalUResourceBundlePointer bundle(ures_open(NULL, locale.getName(), &localStatus)); + int32_t resultLen = 0; + const char16_t* result = ures_getStringByKeyWithFallback( + bundle.getAlias(), + "NumberElements/minimumGroupingDigits", + &resultLen, + &localStatus); + // TODO: Is it safe to assume resultLen == 1? Would locales set minGrouping >= 10? + if (U_FAILURE(localStatus) || resultLen != 1) { + return 1; + } + return result[0] - u'0'; } -Grouper Grouper::minTwoDigits() { - return {-2, -2, true}; } -Grouper Grouper::none() { - return {-1, -1, false}; +Grouper Grouper::forStrategy(UGroupingStrategy grouping) { + switch (grouping) { + case UNUM_GROUPING_OFF: + return {-1, -1, -2}; + case UNUM_GROUPING_AUTO: + return {-2, -2, -2}; + case UNUM_GROUPING_MIN2: + return {-2, -2, -3}; + case UNUM_GROUPING_ON_ALIGNED: + return {-4, -4, 1}; + case UNUM_GROUPING_THOUSANDS: + return {3, 3, 1}; + default: + U_ASSERT(FALSE); + } } -void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo) { - if (fGrouping1 != -2) { +void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo, const Locale& locale) { + if (fGrouping1 != -2 && fGrouping2 != -4) { return; } - auto grouping1 = static_cast<int8_t> (patternInfo.positive.groupingSizes & 0xffff); - auto grouping2 = static_cast<int8_t> ((patternInfo.positive.groupingSizes >> 16) & 0xffff); - auto grouping3 = static_cast<int8_t> ((patternInfo.positive.groupingSizes >> 32) & 0xffff); + auto grouping1 = static_cast<int16_t> (patternInfo.positive.groupingSizes & 0xffff); + auto grouping2 = static_cast<int16_t> ((patternInfo.positive.groupingSizes >> 16) & 0xffff); + auto grouping3 = static_cast<int16_t> ((patternInfo.positive.groupingSizes >> 32) & 0xffff); if (grouping2 == -1) { - grouping1 = -1; + grouping1 = fGrouping1 == -4 ? (short) 3 : (short) -1; } if (grouping3 == -1) { grouping2 = grouping1; } + if (fMinGrouping == -2) { + fMinGrouping = getMinGroupingForLocale(locale); + } else if (fMinGrouping == -3) { + fMinGrouping = uprv_max(2, getMinGroupingForLocale(locale)); + } else { + // leave fMinGrouping alone + } fGrouping1 = grouping1; fGrouping2 = grouping2; } @@ -49,7 +83,7 @@ bool Grouper::groupAtPosition(int32_t position, const impl::DecimalQuantity &val } position -= fGrouping1; return position >= 0 && (position % fGrouping2) == 0 - && value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= (fMin2 ? 2 : 1); + && value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= fMinGrouping; } #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/number_integerwidth.cpp b/deps/icu-small/source/i18n/number_integerwidth.cpp index 10dacfc4ac..4a612273f5 100644 --- a/deps/icu-small/source/i18n/number_integerwidth.cpp +++ b/deps/icu-small/source/i18n/number_integerwidth.cpp @@ -13,25 +13,28 @@ using namespace icu; using namespace icu::number; using namespace icu::number::impl; -IntegerWidth::IntegerWidth(int8_t minInt, int8_t maxInt) { +IntegerWidth::IntegerWidth(digits_t minInt, digits_t maxInt) { fUnion.minMaxInt.fMinInt = minInt; fUnion.minMaxInt.fMaxInt = maxInt; } IntegerWidth IntegerWidth::zeroFillTo(int32_t minInt) { if (minInt >= 0 && minInt <= kMaxIntFracSig) { - return {static_cast<int8_t>(minInt), -1}; + return {static_cast<digits_t>(minInt), -1}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) { if (fHasError) { return *this; } // No-op on error - if (maxInt >= 0 && maxInt <= kMaxIntFracSig) { - return {fUnion.minMaxInt.fMinInt, static_cast<int8_t>(maxInt)}; + digits_t minInt = fUnion.minMaxInt.fMinInt; + if (maxInt >= 0 && maxInt <= kMaxIntFracSig && minInt <= maxInt) { + return {minInt, static_cast<digits_t>(maxInt)}; + } else if (maxInt == -1) { + return {minInt, -1}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/deps/icu-small/source/i18n/number_longnames.cpp b/deps/icu-small/source/i18n/number_longnames.cpp index 88b3413585..5c363442e7 100644 --- a/deps/icu-small/source/i18n/number_longnames.cpp +++ b/deps/icu-small/source/i18n/number_longnames.cpp @@ -5,6 +5,7 @@ #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#include "unicode/simpleformatter.h" #include "unicode/ures.h" #include "ureslocs.h" #include "charstr.h" @@ -19,6 +20,37 @@ using namespace icu::number::impl; namespace { +constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; +constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; +constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 2; + +static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { + // pluralKeyword can also be "dnam" or "per" + if (uprv_strcmp(pluralKeyword, "dnam") == 0) { + return DNAM_INDEX; + } else if (uprv_strcmp(pluralKeyword, "per") == 0) { + return PER_INDEX; + } else { + StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); + return plural; + } +} + +static UnicodeString getWithPlural( + const UnicodeString* strings, + int32_t plural, + UErrorCode& status) { + UnicodeString result = strings[plural]; + if (result.isBogus()) { + result = strings[StandardPlural::Form::OTHER]; + } + if (result.isBogus()) { + // There should always be data in the "other" plural variant. + status = U_INTERNAL_PROGRAM_ERROR; + } + return result; +} + ////////////////////////// /// BEGIN DATA LOADING /// @@ -28,7 +60,7 @@ class PluralTableSink : public ResourceSink { public: explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { // Initialize the array to bogus strings. - for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + for (int32_t i = 0; i < ARRAY_LENGTH; i++) { outArray[i].setToBogus(); } } @@ -36,17 +68,13 @@ class PluralTableSink : public ResourceSink { void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { ResourceTable pluralsTable = value.getTable(status); if (U_FAILURE(status)) { return; } - for (int i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { - // In MeasureUnit data, ignore dnam and per units for now. - if (uprv_strcmp(key, "dnam") == 0 || uprv_strcmp(key, "per") == 0) { - continue; - } - StandardPlural::Form plural = StandardPlural::fromString(key, status); + for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { + int32_t index = getIndex(key, status); if (U_FAILURE(status)) { return; } - if (!outArray[plural].isBogus()) { + if (!outArray[index].isBogus()) { continue; } - outArray[plural] = value.getUnicodeString(status); + outArray[index] = value.getUnicodeString(status); if (U_FAILURE(status)) { return; } } } @@ -105,6 +133,22 @@ void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, } } +UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &width, UErrorCode& status) { + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return {}; } + CharString key; + key.append("units", status); + if (width == UNUM_UNIT_WIDTH_NARROW) { + key.append("Narrow", status); + } else if (width == UNUM_UNIT_WIDTH_SHORT) { + key.append("Short", status); + } + key.append("/compound/per", status); + int32_t len = 0; + const UChar* ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); + return UnicodeString(ptr, len); +} + //////////////////////// /// END DATA LOADING /// //////////////////////// @@ -112,11 +156,24 @@ void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, } // namespace LongNameHandler -LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width, - const PluralRules *rules, const MicroPropsGenerator *parent, - UErrorCode &status) { +LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status) { + MeasureUnit unit = unitRef; + if (uprv_strcmp(perUnit.getType(), "none") != 0) { + // Compound unit: first try to simplify (e.g., meters per second is its own unit). + bool isResolved = false; + MeasureUnit resolved = MeasureUnit::resolveUnitPerUnit(unit, perUnit, &isResolved); + if (isResolved) { + unit = resolved; + } else { + // No simplified form is available. + return forCompoundUnit(loc, unit, perUnit, width, rules, parent, status); + } + } + LongNameHandler result(rules, parent); - UnicodeString simpleFormats[StandardPlural::Form::COUNT]; + UnicodeString simpleFormats[ARRAY_LENGTH]; getMeasureData(loc, unit, width, simpleFormats, status); if (U_FAILURE(status)) { return result; } // TODO: What field to use for units? @@ -124,12 +181,47 @@ LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unit, cons return result; } +LongNameHandler +LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status) { + LongNameHandler result(rules, parent); + UnicodeString primaryData[ARRAY_LENGTH]; + getMeasureData(loc, unit, width, primaryData, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryData[ARRAY_LENGTH]; + getMeasureData(loc, perUnit, width, secondaryData, status); + if (U_FAILURE(status)) { return result; } + + UnicodeString perUnitFormat; + if (!secondaryData[PER_INDEX].isBogus()) { + perUnitFormat = secondaryData[PER_INDEX]; + } else { + UnicodeString rawPerUnitFormat = getPerUnitFormat(loc, width, status); + if (U_FAILURE(status)) { return result; } + // rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit. + SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryFormat = getWithPlural(secondaryData, StandardPlural::Form::ONE, status); + if (U_FAILURE(status)) { return result; } + SimpleFormatter secondaryCompiled(secondaryFormat, 1, 1, status); + if (U_FAILURE(status)) { return result; } + UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); + // TODO: Why does UnicodeString need to be explicit in the following line? + compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status); + if (U_FAILURE(status)) { return result; } + } + // TODO: What field to use for units? + multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_FIELD_COUNT, result.fModifiers, status); + return result; +} + LongNameHandler LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { LongNameHandler result(rules, parent); - UnicodeString simpleFormats[StandardPlural::Form::COUNT]; + UnicodeString simpleFormats[ARRAY_LENGTH]; getCurrencyLongNameData(loc, currency, simpleFormats, status); if (U_FAILURE(status)) { return result; } simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, result.fModifiers, status); @@ -139,20 +231,30 @@ LongNameHandler LongNameHandler::forCurrencyLongNames(const Locale &loc, const C void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, SimpleModifier *output, UErrorCode &status) { for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { - UnicodeString simpleFormat = simpleFormats[i]; - if (simpleFormat.isBogus()) { - simpleFormat = simpleFormats[StandardPlural::Form::OTHER]; - } - if (simpleFormat.isBogus()) { - // There should always be data in the "other" plural variant. - status = U_INTERNAL_PROGRAM_ERROR; - return; - } - SimpleFormatter compiledFormatter(simpleFormat, 1, 1, status); + UnicodeString simpleFormat = getWithPlural(simpleFormats, i, status); + if (U_FAILURE(status)) { return; } + SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); + if (U_FAILURE(status)) { return; } output[i] = SimpleModifier(compiledFormatter, field, false); } } +void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, + Field field, SimpleModifier *output, UErrorCode &status) { + SimpleFormatter trailCompiled(trailFormat, 1, 1, status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { + UnicodeString leadFormat = getWithPlural(leadFormats, i, status); + if (U_FAILURE(status)) { return; } + UnicodeString compoundFormat; + trailCompiled.format(leadFormat, compoundFormat, status); + if (U_FAILURE(status)) { return; } + SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); + if (U_FAILURE(status)) { return; } + output[i] = SimpleModifier(compoundCompiled, field, false); + } +} + void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { parent->processQuantity(quantity, micros, status); diff --git a/deps/icu-small/source/i18n/number_longnames.h b/deps/icu-small/source/i18n/number_longnames.h index 22ecbac30e..8738bb99e7 100644 --- a/deps/icu-small/source/i18n/number_longnames.h +++ b/deps/icu-small/source/i18n/number_longnames.h @@ -21,8 +21,9 @@ class LongNameHandler : public MicroPropsGenerator, public UMemory { const MicroPropsGenerator *parent, UErrorCode &status); static LongNameHandler - forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const UNumberUnitWidth &width, - const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status); + forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status); void processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const U_OVERRIDE; @@ -35,8 +36,15 @@ class LongNameHandler : public MicroPropsGenerator, public UMemory { LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) : rules(rules), parent(parent) {} + static LongNameHandler + forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, + const UNumberUnitWidth &width, const PluralRules *rules, + const MicroPropsGenerator *parent, UErrorCode &status); + static void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, SimpleModifier *output, UErrorCode &status); + static void multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, + Field field, SimpleModifier *output, UErrorCode &status); }; } // namespace impl diff --git a/deps/icu-small/source/i18n/number_modifiers.cpp b/deps/icu-small/source/i18n/number_modifiers.cpp index a19b12d11e..872b97010d 100644 --- a/deps/icu-small/source/i18n/number_modifiers.cpp +++ b/deps/icu-small/source/i18n/number_modifiers.cpp @@ -74,19 +74,29 @@ bool ConstantAffixModifier::isStrong() const { SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong) : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong) { - U_ASSERT(1 == - SimpleFormatter::getArgumentLimit(fCompiledPattern.getBuffer(), fCompiledPattern.length())); - if (fCompiledPattern.charAt(1) != 0) { + int32_t argLimit = SimpleFormatter::getArgumentLimit( + fCompiledPattern.getBuffer(), fCompiledPattern.length()); + if (argLimit == 0) { + // No arguments in compiled pattern fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; - fSuffixOffset = 3 + fPrefixLength; - } else { - fPrefixLength = 0; - fSuffixOffset = 2; - } - if (3 + fPrefixLength < fCompiledPattern.length()) { - fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; - } else { + U_ASSERT(2 + fPrefixLength == fCompiledPattern.length()); + // Set suffixOffset = -1 to indicate no arguments in compiled pattern. + fSuffixOffset = -1; fSuffixLength = 0; + } else { + U_ASSERT(argLimit == 1); + if (fCompiledPattern.charAt(1) != 0) { + fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; + fSuffixOffset = 3 + fPrefixLength; + } else { + fPrefixLength = 0; + fSuffixOffset = 2; + } + if (3 + fPrefixLength < fCompiledPattern.length()) { + fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; + } else { + fSuffixLength = 0; + } } } @@ -123,26 +133,37 @@ bool SimpleModifier::isStrong() const { int32_t SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, Field field, UErrorCode &status) const { - if (fPrefixLength > 0) { - result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); - } - if (fSuffixLength > 0) { - result.insert( - endIndex + fPrefixLength, - fCompiledPattern, - 1 + fSuffixOffset, - 1 + fSuffixOffset + fSuffixLength, - field, - status); + if (fSuffixOffset == -1) { + // There is no argument for the inner number; overwrite the entire segment with our string. + return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); + } else { + if (fPrefixLength > 0) { + result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); + } + if (fSuffixLength > 0) { + result.insert( + endIndex + fPrefixLength, + fCompiledPattern, + 1 + fSuffixOffset, + 1 + fSuffixOffset + fSuffixLength, + field, + status); + } + return fPrefixLength + fSuffixLength; } - return fPrefixLength + fSuffixLength; } int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const { - // Insert the suffix first since inserting the prefix will change the rightIndex - int32_t length = output.insert(rightIndex, fSuffix, status); - length += output.insert(leftIndex, fPrefix, status); + int32_t length = output.insert(leftIndex, fPrefix, status); + if (fOverwrite) { + length += output.splice( + leftIndex + length, + rightIndex + length, + UnicodeString(), 0, 0, + UNUM_FIELD_COUNT, status); + } + length += output.insert(rightIndex + length, fSuffix, status); return length; } @@ -162,10 +183,11 @@ bool ConstantMultiFieldModifier::isStrong() const { CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, + bool overwrite, bool strong, const DecimalFormatSymbols &symbols, UErrorCode &status) - : ConstantMultiFieldModifier(prefix, suffix, strong) { + : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) { // Check for currency spacing. Do not build the UnicodeSets unless there is // a currency code point at a boundary. if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) { diff --git a/deps/icu-small/source/i18n/number_modifiers.h b/deps/icu-small/source/i18n/number_modifiers.h index 6a88828a44..4762a6f6d3 100644 --- a/deps/icu-small/source/i18n/number_modifiers.h +++ b/deps/icu-small/source/i18n/number_modifiers.h @@ -103,8 +103,15 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory { */ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { public: - ConstantMultiFieldModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, - bool strong) : fPrefix(prefix), fSuffix(suffix), fStrong(strong) {} + ConstantMultiFieldModifier( + const NumberStringBuilder &prefix, + const NumberStringBuilder &suffix, + bool overwrite, + bool strong) + : fPrefix(prefix), + fSuffix(suffix), + fOverwrite(overwrite), + fStrong(strong) {} int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; @@ -120,6 +127,7 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { // value and is treated internally as immutable. NumberStringBuilder fPrefix; NumberStringBuilder fSuffix; + bool fOverwrite; bool fStrong; }; @@ -127,8 +135,13 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { class U_I18N_API CurrencySpacingEnabledModifier : public ConstantMultiFieldModifier { public: /** Safe code path */ - CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, - bool strong, const DecimalFormatSymbols &symbols, UErrorCode &status); + CurrencySpacingEnabledModifier( + const NumberStringBuilder &prefix, + const NumberStringBuilder &suffix, + bool overwrite, + bool strong, + const DecimalFormatSymbols &symbols, + UErrorCode &status); int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; @@ -216,31 +229,33 @@ class U_I18N_API ParameterizedModifier : public UMemory { } } - void adoptPositiveNegativeModifiers(const Modifier *positive, const Modifier *negative) { - mods[0] = positive; - mods[1] = negative; + void adoptPositiveNegativeModifiers( + const Modifier *positive, const Modifier *zero, const Modifier *negative) { + mods[2] = positive; + mods[1] = zero; + mods[0] = negative; } /** The modifier is ADOPTED. */ - void adoptSignPluralModifier(bool isNegative, StandardPlural::Form plural, const Modifier *mod) { - mods[getModIndex(isNegative, plural)] = mod; + void adoptSignPluralModifier(int8_t signum, StandardPlural::Form plural, const Modifier *mod) { + mods[getModIndex(signum, plural)] = mod; } /** Returns a reference to the modifier; no ownership change. */ - const Modifier *getModifier(bool isNegative) const { - return mods[isNegative ? 1 : 0]; + const Modifier *getModifier(int8_t signum) const { + return mods[signum + 1]; } /** Returns a reference to the modifier; no ownership change. */ - const Modifier *getModifier(bool isNegative, StandardPlural::Form plural) const { - return mods[getModIndex(isNegative, plural)]; + const Modifier *getModifier(int8_t signum, StandardPlural::Form plural) const { + return mods[getModIndex(signum, plural)]; } private: - const Modifier *mods[2 * StandardPlural::COUNT]; + const Modifier *mods[3 * StandardPlural::COUNT]; - inline static int32_t getModIndex(bool isNegative, StandardPlural::Form plural) { - return static_cast<int32_t>(plural) * 2 + (isNegative ? 1 : 0); + inline static int32_t getModIndex(int8_t signum, StandardPlural::Form plural) { + return static_cast<int32_t>(plural) * 3 + (signum + 1); } }; diff --git a/deps/icu-small/source/i18n/number_notation.cpp b/deps/icu-small/source/i18n/number_notation.cpp index ff0cd9505d..f4ad333354 100644 --- a/deps/icu-small/source/i18n/number_notation.cpp +++ b/deps/icu-small/source/i18n/number_notation.cpp @@ -54,13 +54,13 @@ Notation Notation::simple() { ScientificNotation ScientificNotation::withMinExponentDigits(int32_t minExponentDigits) const { - if (minExponentDigits >= 0 && minExponentDigits < kMaxIntFracSig) { + if (minExponentDigits >= 1 && minExponentDigits <= kMaxIntFracSig) { ScientificSettings settings = fUnion.scientific; - settings.fMinExponentDigits = (int8_t) minExponentDigits; + settings.fMinExponentDigits = static_cast<digits_t>(minExponentDigits); NotationUnion union_ = {settings}; return {NTN_SCIENTIFIC, union_}; } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/deps/icu-small/source/i18n/number_padding.cpp b/deps/icu-small/source/i18n/number_padding.cpp index a478af6054..b1db3490cd 100644 --- a/deps/icu-small/source/i18n/number_padding.cpp +++ b/deps/icu-small/source/i18n/number_padding.cpp @@ -43,7 +43,7 @@ Padder Padder::codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosit if (targetWidth >= 0) { return {cp, targetWidth, position}; } else { - return {U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } diff --git a/deps/icu-small/source/i18n/number_patternmodifier.cpp b/deps/icu-small/source/i18n/number_patternmodifier.cpp index 0599f92a4f..e182104c91 100644 --- a/deps/icu-small/source/i18n/number_patternmodifier.cpp +++ b/deps/icu-small/source/i18n/number_patternmodifier.cpp @@ -38,8 +38,8 @@ MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const Cu this->rules = rules; } -void MutablePatternModifier::setNumberProperties(bool isNegative, StandardPlural::Form plural) { - this->isNegative = isNegative; +void MutablePatternModifier::setNumberProperties(int8_t signum, StandardPlural::Form plural) { + this->signum = signum; this->plural = plural; } @@ -74,10 +74,12 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren if (needsPlurals()) { // Slower path when we require the plural keyword. for (StandardPlural::Form plural : STANDARD_PLURAL_VALUES) { - setNumberProperties(false, plural); - pm->adoptSignPluralModifier(false, plural, createConstantModifier(status)); - setNumberProperties(true, plural); - pm->adoptSignPluralModifier(true, plural, createConstantModifier(status)); + setNumberProperties(1, plural); + pm->adoptSignPluralModifier(1, plural, createConstantModifier(status)); + setNumberProperties(0, plural); + pm->adoptSignPluralModifier(0, plural, createConstantModifier(status)); + setNumberProperties(-1, plural); + pm->adoptSignPluralModifier(-1, plural, createConstantModifier(status)); } if (U_FAILURE(status)) { delete pm; @@ -86,11 +88,13 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren return new ImmutablePatternModifier(pm, rules, parent); // adopts pm } else { // Faster path when plural keyword is not needed. - setNumberProperties(false, StandardPlural::Form::COUNT); + setNumberProperties(1, StandardPlural::Form::COUNT); Modifier *positive = createConstantModifier(status); - setNumberProperties(true, StandardPlural::Form::COUNT); + setNumberProperties(0, StandardPlural::Form::COUNT); + Modifier *zero = createConstantModifier(status); + setNumberProperties(-1, StandardPlural::Form::COUNT); Modifier *negative = createConstantModifier(status); - pm->adoptPositiveNegativeModifiers(positive, negative); + pm->adoptPositiveNegativeModifiers(positive, zero, negative); if (U_FAILURE(status)) { delete pm; return nullptr; @@ -105,9 +109,9 @@ ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErro insertPrefix(a, 0, status); insertSuffix(b, 0, status); if (patternInfo->hasCurrencySign()) { - return new CurrencySpacingEnabledModifier(a, b, fStrong, *symbols, status); + return new CurrencySpacingEnabledModifier(a, b, !patternInfo->hasBody(), fStrong, *symbols, status); } else { - return new ConstantMultiFieldModifier(a, b, fStrong); + return new ConstantMultiFieldModifier(a, b, !patternInfo->hasBody(), fStrong); } } @@ -123,13 +127,13 @@ void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroP void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity &quantity) const { if (rules == nullptr) { - micros.modMiddle = pm->getModifier(quantity.isNegative()); + micros.modMiddle = pm->getModifier(quantity.signum()); } else { // TODO: Fix this. Avoid the copy. DecimalQuantity copy(quantity); copy.roundToInfinity(); StandardPlural::Form plural = copy.getStandardPlural(rules); - micros.modMiddle = pm->getModifier(quantity.isNegative(), plural); + micros.modMiddle = pm->getModifier(quantity.signum(), plural); } } @@ -149,9 +153,9 @@ void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps &mi // TODO: Fix this. Avoid the copy. DecimalQuantity copy(fq); micros.rounding.apply(copy, status); - nonConstThis->setNumberProperties(fq.isNegative(), copy.getStandardPlural(rules)); + nonConstThis->setNumberProperties(fq.signum(), copy.getStandardPlural(rules)); } else { - nonConstThis->setNumberProperties(fq.isNegative(), StandardPlural::Form::COUNT); + nonConstThis->setNumberProperties(fq.signum(), StandardPlural::Form::COUNT); } micros.modMiddle = this; } @@ -163,9 +167,23 @@ int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftI auto nonConstThis = const_cast<MutablePatternModifier *>(this); int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status); int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status); + // If the pattern had no decimal stem body (like #,##0.00), overwrite the value. + int32_t overwriteLen = 0; + if (!patternInfo->hasBody()) { + overwriteLen = output.splice( + leftIndex + prefixLen, rightIndex + prefixLen, + UnicodeString(), 0, 0, UNUM_FIELD_COUNT, + status); + } CurrencySpacingEnabledModifier::applyCurrencySpacing( - output, leftIndex, prefixLen, rightIndex + prefixLen, suffixLen, *symbols, status); - return prefixLen + suffixLen; + output, + leftIndex, + prefixLen, + rightIndex + overwriteLen + prefixLen, + suffixLen, + *symbols, + status); + return prefixLen + overwriteLen + suffixLen; } int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const { @@ -230,13 +248,16 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { } else if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN) { return UnicodeString(); } else { + UCurrNameStyle selector = (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW) + ? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME + : UCurrNameStyle::UCURR_SYMBOL_NAME; UErrorCode status = U_ZERO_ERROR; UBool isChoiceFormat = FALSE; int32_t symbolLen = 0; const char16_t *symbol = ucurr_getName( currencyCode, symbols->getLocale().getName(), - UCurrNameStyle::UCURR_SYMBOL_NAME, + selector, &isChoiceFormat, &symbolLen, &status); @@ -278,14 +299,17 @@ void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) { inCharSequenceMode = true; // Should the output render '+' where '-' would normally appear in the pattern? - plusReplacesMinusSign = !isNegative && ( - signDisplay == UNUM_SIGN_ALWAYS || - signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS) && - patternInfo->positiveHasPlusSign() == false; + plusReplacesMinusSign = signum != -1 + && (signDisplay == UNUM_SIGN_ALWAYS + || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS + || (signum == 1 + && (signDisplay == UNUM_SIGN_EXCEPT_ZERO + || signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) + && patternInfo->positiveHasPlusSign() == false; // Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.) bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && ( - isNegative || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign)); + signum == -1 || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign)); // Resolve the flags for the affix pattern. fFlags = 0; @@ -303,7 +327,7 @@ void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) { // Should we prepend a sign to the pattern? if (!isPrefix || useNegativeAffixPattern) { prependSign = false; - } else if (isNegative) { + } else if (signum == -1) { prependSign = signDisplay != UNUM_SIGN_NEVER; } else { prependSign = plusReplacesMinusSign; diff --git a/deps/icu-small/source/i18n/number_patternmodifier.h b/deps/icu-small/source/i18n/number_patternmodifier.h index 705037f0ba..9c8b95f776 100644 --- a/deps/icu-small/source/i18n/number_patternmodifier.h +++ b/deps/icu-small/source/i18n/number_patternmodifier.h @@ -18,8 +18,8 @@ U_NAMESPACE_BEGIN // Export an explicit template instantiation of the LocalPointer that is used as a // data member of ParameterizedModifier. -// (MSVC requires this, even though it should not be necessary.) -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= #pragma warning(suppress: 4661) template class U_I18N_API LocalPointerBase<number::impl::ParameterizedModifier>; @@ -125,13 +125,13 @@ class U_I18N_API MutablePatternModifier /** * Sets attributes of the current number being processed. * - * @param isNegative - * Whether the number is negative. + * @param signum + * -1 if negative; +1 if positive; or 0 if zero. * @param plural - * The plural form of the number, required only if the pattern contains the triple currency sign, "¤¤¤" - * (and as indicated by {@link #needsPlurals()}). + * The plural form of the number, required only if the pattern contains the triple + * currency sign, "¤¤¤" (and as indicated by {@link #needsPlurals()}). */ - void setNumberProperties(bool isNegative, StandardPlural::Form plural); + void setNumberProperties(int8_t signum, StandardPlural::Form plural); /** * Returns true if the pattern represented by this MurkyModifier requires a plural keyword in order to localize. @@ -211,7 +211,7 @@ class U_I18N_API MutablePatternModifier const PluralRules *rules; // Number details (initialized in setNumberProperties) - bool isNegative; + int8_t signum; StandardPlural::Form plural; // QuantityChain details (initialized in addToChain) diff --git a/deps/icu-small/source/i18n/number_patternstring.cpp b/deps/icu-small/source/i18n/number_patternstring.cpp index c67e354181..20178824b0 100644 --- a/deps/icu-small/source/i18n/number_patternstring.cpp +++ b/deps/icu-small/source/i18n/number_patternstring.cpp @@ -95,6 +95,10 @@ bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &st return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status); } +bool ParsedPatternInfo::hasBody() const { + return positive.integerTotal > 0; +} + ///////////////////////////////////////////////////// /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// ///////////////////////////////////////////////////// diff --git a/deps/icu-small/source/i18n/number_patternstring.h b/deps/icu-small/source/i18n/number_patternstring.h index 6e1bb7f44d..ec44290d66 100644 --- a/deps/icu-small/source/i18n/number_patternstring.h +++ b/deps/icu-small/source/i18n/number_patternstring.h @@ -84,6 +84,8 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor bool containsSymbolType(AffixPatternType type, UErrorCode &status) const U_OVERRIDE; + bool hasBody() const U_OVERRIDE; + private: struct U_I18N_API ParserState { const UnicodeString &pattern; // reference to the parent diff --git a/deps/icu-small/source/i18n/number_rounding.cpp b/deps/icu-small/source/i18n/number_rounding.cpp index 5c494f0954..fd4dafdf98 100644 --- a/deps/icu-small/source/i18n/number_rounding.cpp +++ b/deps/icu-small/source/i18n/number_rounding.cpp @@ -58,7 +58,7 @@ FractionRounder Rounder::fixedFraction(int32_t minMaxFractionPlaces) { if (minMaxFractionPlaces >= 0 && minMaxFractionPlaces <= kMaxIntFracSig) { return constructFraction(minMaxFractionPlaces, minMaxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -66,7 +66,7 @@ FractionRounder Rounder::minFraction(int32_t minFractionPlaces) { if (minFractionPlaces >= 0 && minFractionPlaces <= kMaxIntFracSig) { return constructFraction(minFractionPlaces, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -74,7 +74,7 @@ FractionRounder Rounder::maxFraction(int32_t maxFractionPlaces) { if (maxFractionPlaces >= 0 && maxFractionPlaces <= kMaxIntFracSig) { return constructFraction(0, maxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -83,40 +83,40 @@ FractionRounder Rounder::minMaxFraction(int32_t minFractionPlaces, int32_t maxFr minFractionPlaces <= maxFractionPlaces) { return constructFraction(minFractionPlaces, maxFractionPlaces); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::fixedDigits(int32_t minMaxSignificantDigits) { - if (minMaxSignificantDigits >= 0 && minMaxSignificantDigits <= kMaxIntFracSig) { + if (minMaxSignificantDigits >= 1 && minMaxSignificantDigits <= kMaxIntFracSig) { return constructSignificant(minMaxSignificantDigits, minMaxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::minDigits(int32_t minSignificantDigits) { - if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { return constructSignificant(minSignificantDigits, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::maxDigits(int32_t maxSignificantDigits) { - if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { - return constructSignificant(0, maxSignificantDigits); + if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { + return constructSignificant(1, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder Rounder::minMaxDigits(int32_t minSignificantDigits, int32_t maxSignificantDigits) { - if (minSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig && + if (minSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig && minSignificantDigits <= maxSignificantDigits) { return constructSignificant(minSignificantDigits, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -124,7 +124,7 @@ IncrementRounder Rounder::increment(double roundingIncrement) { if (roundingIncrement > 0.0) { return constructIncrement(roundingIncrement, 0); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -139,19 +139,19 @@ Rounder Rounder::withMode(RoundingMode roundingMode) const { Rounder FractionRounder::withMinDigits(int32_t minSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state - if (minSignificantDigits >= 0 && minSignificantDigits <= kMaxIntFracSig) { + if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { return constructFractionSignificant(*this, minSignificantDigits, -1); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } Rounder FractionRounder::withMaxDigits(int32_t maxSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state - if (maxSignificantDigits >= 0 && maxSignificantDigits <= kMaxIntFracSig) { + if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { return constructFractionSignificant(*this, -1, maxSignificantDigits); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } @@ -185,14 +185,14 @@ Rounder IncrementRounder::withMinFraction(int32_t minFrac) const { if (minFrac >= 0 && minFrac <= kMaxIntFracSig) { return constructIncrement(fUnion.increment.fIncrement, minFrac); } else { - return {U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR}; + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } } FractionRounder Rounder::constructFraction(int32_t minFrac, int32_t maxFrac) { FractionSignificantSettings settings; - settings.fMinFrac = static_cast<int8_t> (minFrac); - settings.fMaxFrac = static_cast<int8_t> (maxFrac); + settings.fMinFrac = static_cast<digits_t>(minFrac); + settings.fMaxFrac = static_cast<digits_t>(maxFrac); settings.fMinSig = -1; settings.fMaxSig = -1; RounderUnion union_; @@ -204,8 +204,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) { FractionSignificantSettings settings; settings.fMinFrac = -1; settings.fMaxFrac = -1; - settings.fMinSig = static_cast<int8_t>(minSig); - settings.fMaxSig = static_cast<int8_t>(maxSig); + settings.fMinSig = static_cast<digits_t>(minSig); + settings.fMaxSig = static_cast<digits_t>(maxSig); RounderUnion union_; union_.fracSig = settings; return {RND_SIGNIFICANT, union_, kDefaultMode}; @@ -214,8 +214,8 @@ Rounder Rounder::constructSignificant(int32_t minSig, int32_t maxSig) { Rounder Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSig, int32_t maxSig) { FractionSignificantSettings settings = base.fUnion.fracSig; - settings.fMinSig = static_cast<int8_t>(minSig); - settings.fMaxSig = static_cast<int8_t>(maxSig); + settings.fMinSig = static_cast<digits_t>(minSig); + settings.fMaxSig = static_cast<digits_t>(maxSig); RounderUnion union_; union_.fracSig = settings; return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode}; @@ -224,7 +224,7 @@ Rounder::constructFractionSignificant(const FractionRounder &base, int32_t minSi IncrementRounder Rounder::constructIncrement(double increment, int32_t minFrac) { IncrementSettings settings; settings.fIncrement = increment; - settings.fMinFrac = minFrac; + settings.fMinFrac = static_cast<digits_t>(minFrac); RounderUnion union_; union_.increment = settings; return {RND_INCREMENT, union_, kDefaultMode}; @@ -251,28 +251,39 @@ void Rounder::setLocaleData(const CurrencyUnit ¤cy, UErrorCode &status) { int32_t Rounder::chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl::MultiplierProducer &producer, UErrorCode &status) { - // TODO: Make a better and more efficient implementation. - // TODO: Avoid the object creation here. - DecimalQuantity copy(input); - + // Do not call this method with zero. U_ASSERT(!input.isZero()); - int32_t magnitude = input.getMagnitude(); - int32_t multiplier = producer.getMultiplier(magnitude); + + // Perform the first attempt at rounding. + int magnitude = input.getMagnitude(); + int multiplier = producer.getMultiplier(magnitude); input.adjustMagnitude(multiplier); apply(input, status); - // If the number turned to zero when rounding, do not re-attempt the rounding. - if (!input.isZero() && input.getMagnitude() == magnitude + multiplier + 1) { - magnitude += 1; - input = copy; - multiplier = producer.getMultiplier(magnitude); - input.adjustMagnitude(multiplier); - U_ASSERT(input.getMagnitude() == magnitude + multiplier - 1); - apply(input, status); - U_ASSERT(input.getMagnitude() == magnitude + multiplier); + // If the number rounded to zero, exit. + if (input.isZero() || U_FAILURE(status)) { + return multiplier; + } + + // If the new magnitude after rounding is the same as it was before rounding, then we are done. + // This case applies to most numbers. + if (input.getMagnitude() == magnitude + multiplier) { + return multiplier; } - return multiplier; + // If the above case DIDN'T apply, then we have a case like 99.9 -> 100 or 999.9 -> 1000: + // The number rounded up to the next magnitude. Check if the multiplier changes; if it doesn't, + // we do not need to make any more adjustments. + int _multiplier = producer.getMultiplier(magnitude + 1); + if (multiplier == _multiplier) { + return multiplier; + } + + // We have a case like 999.9 -> 1000, where the correct output is "1K", not "1000". + // Fix the magnitude and re-apply the rounding strategy. + input.adjustMagnitude(_multiplier - multiplier); + apply(input, status); + return _multiplier; } /** This is the method that contains the actual rounding logic. */ @@ -331,6 +342,7 @@ void Rounder::apply(impl::DecimalQuantity &value, UErrorCode& status) const { case RND_CURRENCY: // Call .withCurrency() before .apply()! U_ASSERT(false); + break; case RND_PASS_THROUGH: break; diff --git a/deps/icu-small/source/i18n/number_stringbuilder.cpp b/deps/icu-small/source/i18n/number_stringbuilder.cpp index e6e86bd429..37159d7e53 100644 --- a/deps/icu-small/source/i18n/number_stringbuilder.cpp +++ b/deps/icu-small/source/i18n/number_stringbuilder.cpp @@ -191,6 +191,30 @@ NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t return count; } +int32_t +NumberStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, + int32_t startOther, int32_t endOther, Field field, UErrorCode& status) { + int32_t thisLength = endThis - startThis; + int32_t otherLength = endOther - startOther; + int32_t count = otherLength - thisLength; + int32_t position; + if (count > 0) { + // Overall, chars need to be added. + position = prepareForInsert(startThis, count, status); + } else { + // Overall, chars need to be removed or kept the same. + position = remove(startThis, -count); + } + if (U_FAILURE(status)) { + return count; + } + for (int32_t i = 0; i < otherLength; i++) { + getCharPtr()[position + i] = unistr.charAt(startOther + i); + getFieldPtr()[position + i] = field; + } + return count; +} + int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) { return insert(fLength, other, status); } @@ -296,6 +320,19 @@ int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count return fZero + index; } +int32_t NumberStringBuilder::remove(int32_t index, int32_t count) { + // TODO: Reset the heap here? (If the string after removal can fit on stack?) + int32_t position = index + fZero; + uprv_memmove2(getCharPtr() + position, + getCharPtr() + position + count, + sizeof(char16_t) * (fLength - index - count)); + uprv_memmove2(getFieldPtr() + position, + getFieldPtr() + position + count, + sizeof(Field) * (fLength - index - count)); + fLength -= count; + return position; +} + UnicodeString NumberStringBuilder::toUnicodeString() const { return UnicodeString(getCharPtr() + fZero, fLength); } diff --git a/deps/icu-small/source/i18n/number_stringbuilder.h b/deps/icu-small/source/i18n/number_stringbuilder.h index f08dcb1d1b..a97cc9ca02 100644 --- a/deps/icu-small/source/i18n/number_stringbuilder.h +++ b/deps/icu-small/source/i18n/number_stringbuilder.h @@ -77,6 +77,9 @@ class U_I18N_API NumberStringBuilder : public UMemory { int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field, UErrorCode &status); + int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, + int32_t startOther, int32_t endOther, Field field, UErrorCode& status); + int32_t append(const NumberStringBuilder &other, UErrorCode &status); int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status); @@ -123,6 +126,8 @@ class U_I18N_API NumberStringBuilder : public UMemory { int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status); int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status); + + int32_t remove(int32_t index, int32_t count); }; } // namespace impl diff --git a/deps/icu-small/source/i18n/number_types.h b/deps/icu-small/source/i18n/number_types.h index 2bc21bd40d..c01765e2ce 100644 --- a/deps/icu-small/source/i18n/number_types.h +++ b/deps/icu-small/source/i18n/number_types.h @@ -31,7 +31,7 @@ typedef UNumberFormatPadPosition PadPosition; typedef UNumberCompactStyle CompactStyle; // ICU4J Equivalent: RoundingUtils.MAX_INT_FRAC_SIG -static constexpr int32_t kMaxIntFracSig = 100; +static constexpr int32_t kMaxIntFracSig = 999; // ICU4J Equivalent: RoundingUtils.DEFAULT_ROUNDING_MODE static constexpr RoundingMode kDefaultMode = RoundingMode::UNUM_FOUND_HALFEVEN; @@ -42,10 +42,6 @@ static constexpr char16_t kFallbackPaddingString[] = u" "; // ICU4J Equivalent: NumberFormatterImpl.DEFAULT_CURRENCY static constexpr char16_t kDefaultCurrency[] = u"XXX"; -// FIXME: New error codes: -static constexpr UErrorCode U_NUMBER_DIGIT_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR; -static constexpr UErrorCode U_NUMBER_PADDING_WIDTH_OUTOFBOUNDS_ERROR = U_ILLEGAL_ARGUMENT_ERROR; - // Forward declarations: class Modifier; @@ -142,6 +138,13 @@ class U_I18N_API AffixPatternProvider { virtual bool negativeHasMinusSign() const = 0; virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0; + + /** + * True if the pattern has a number placeholder like "0" or "#,##0.00"; false if the pattern does not + * have one. This is used in cases like compact notation, where the pattern replaces the entire + * number instead of rendering the number. + */ + virtual bool hasBody() const = 0; }; /** @@ -230,10 +233,21 @@ class U_I18N_API MicroPropsGenerator { virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0; }; +/** + * An interface used by compact notation and scientific notation to choose a multiplier while rounding. + */ class MultiplierProducer { public: virtual ~MultiplierProducer() = default; + /** + * Maps a magnitude to a multiplier in powers of ten. For example, in compact notation in English, a magnitude of 5 + * (e.g., 100,000) should return a multiplier of -3, since the number is displayed in thousands. + * + * @param magnitude + * The power of ten of the input number. + * @return The shift in powers of ten. + */ virtual int32_t getMultiplier(int32_t magnitude) const = 0; }; diff --git a/deps/icu-small/source/i18n/rbnf.cpp b/deps/icu-small/source/i18n/rbnf.cpp index 5b54e303f3..3385f300b1 100644 --- a/deps/icu-small/source/i18n/rbnf.cpp +++ b/deps/icu-small/source/i18n/rbnf.cpp @@ -1371,7 +1371,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text, ParsePosition working_pp(0); Formattable working_result; - rp->parse(workingText, working_pp, kMaxDouble, working_result); + rp->parse(workingText, working_pp, kMaxDouble, 0, working_result); if (working_pp.getIndex() > high_pp.getIndex()) { high_pp = working_pp; high_result = working_result; diff --git a/deps/icu-small/source/i18n/regexcmp.cpp b/deps/icu-small/source/i18n/regexcmp.cpp index 6cfa61f187..410ff9513b 100644 --- a/deps/icu-small/source/i18n/regexcmp.cpp +++ b/deps/icu-small/source/i18n/regexcmp.cpp @@ -4450,11 +4450,9 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB // See if the property looks like a Java "InBlockName", which // we will recast as "Block=BlockName" // - static const UChar IN[] = {0x49, 0x6E, 0}; // "In" - static const UChar BLOCK[] = {0x42, 0x6C, 0x6f, 0x63, 0x6b, 0x3d, 00}; // "Block=" - if (mPropName.startsWith(IN, 2) && propName.length()>=3) { + if (mPropName.startsWith(u"In", 2) && propName.length()>=3) { setExpr.truncate(4); // Leaves "[\p{", or "[\P{" - setExpr.append(BLOCK, -1); + setExpr.append(u"Block=", -1); setExpr.append(UnicodeString(mPropName, 2)); // Property with the leading "In" removed. setExpr.append(chRBrace); setExpr.append(chRBracket); diff --git a/deps/icu-small/source/i18n/rematch.cpp b/deps/icu-small/source/i18n/rematch.cpp index 1bdad18776..f252182207 100644 --- a/deps/icu-small/source/i18n/rematch.cpp +++ b/deps/icu-small/source/i18n/rematch.cpp @@ -5469,7 +5469,7 @@ GC_Done: if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { @@ -5546,7 +5546,7 @@ GC_Done: if (lbStartIdx < 0) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; - if (lbStartIdx > 0) { + if (lbStartIdx > 0 && lbStartIdx < fInputLength) { U16_SET_CP_START(inputBuf, 0, lbStartIdx); } } else { diff --git a/deps/icu-small/source/i18n/simpletz.cpp b/deps/icu-small/source/i18n/simpletz.cpp index 557b02620b..9bce8ed557 100644 --- a/deps/icu-small/source/i18n/simpletz.cpp +++ b/deps/icu-small/source/i18n/simpletz.cpp @@ -177,7 +177,7 @@ void SimpleTimeZone::construct(int32_t rawOffsetGMT, decodeRules(status); - if (savingsDST <= 0) { + if (savingsDST == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; } } @@ -686,7 +686,7 @@ SimpleTimeZone::setRawOffset(int32_t offsetMillis) void SimpleTimeZone::setDSTSavings(int32_t millisSavedDuringDST, UErrorCode& status) { - if (millisSavedDuringDST <= 0) { + if (millisSavedDuringDST == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { diff --git a/deps/icu-small/source/i18n/ucol.cpp b/deps/icu-small/source/i18n/ucol.cpp index 34a394682f..7d3392da22 100644 --- a/deps/icu-small/source/i18n/ucol.cpp +++ b/deps/icu-small/source/i18n/ucol.cpp @@ -95,6 +95,7 @@ ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferS Collator *newColl = Collator::fromUCollator(coll)->clone(); if (newColl == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } else { *status = U_SAFECLONE_ALLOCATED_WARNING; } diff --git a/deps/icu-small/source/i18n/ucol_res.cpp b/deps/icu-small/source/i18n/ucol_res.cpp index 0f1d6d23b1..76975ecc01 100644 --- a/deps/icu-small/source/i18n/ucol_res.cpp +++ b/deps/icu-small/source/i18n/ucol_res.cpp @@ -451,6 +451,7 @@ CollationLoader::loadFromData(UErrorCode &errorCode) { const CollationCacheEntry *entry = new CollationCacheEntry(validLocale, t.getAlias()); if(entry == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } else { t.orphan(); } diff --git a/deps/icu-small/source/i18n/udatpg.cpp b/deps/icu-small/source/i18n/udatpg.cpp index 9ba82b529c..febf73b3ce 100644 --- a/deps/icu-small/source/i18n/udatpg.cpp +++ b/deps/icu-small/source/i18n/udatpg.cpp @@ -181,6 +181,25 @@ udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg, return result.getBuffer(); } +U_CAPI int32_t U_EXPORT2 +udatpg_getFieldDisplayName(const UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + UDateTimePGDisplayWidth width, + UChar *fieldName, int32_t capacity, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) + return -1; + if (fieldName == NULL ? capacity != 0 : capacity < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + UnicodeString result = ((const DateTimePatternGenerator *)dtpg)->getFieldDisplayName(field,width); + if (fieldName == NULL) { + return result.length(); + } + return result.extract(fieldName, capacity, *pErrorCode); +} + U_CAPI void U_EXPORT2 udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg, const UChar *dtFormat, int32_t length) { diff --git a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h index 3fbe5da9ce..d682d2d0e7 100644 --- a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h +++ b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h @@ -16,7 +16,7 @@ #include "unicode/utypes.h" /** * \file - * \brief C++ API: Formats decimal numbers in compact form. + * \brief C++ API: Compatibility APIs for compact decimal number formatting. */ #if !UCONFIG_NO_FORMATTING @@ -30,6 +30,11 @@ U_NAMESPACE_BEGIN class PluralRules; /** + * <p><strong>IMPORTANT:</strong> New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. + * <hr/> + * * The CompactDecimalFormat produces abbreviated numbers, suitable for display in * environments will limited real estate. For example, 'Hits: 1.2B' instead of * 'Hits: 1,200,000,000'. The format will be appropriate for the given language, @@ -56,6 +61,9 @@ public: /** * Returns a compact decimal instance for specified locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @param style whether to use short or long style. * @param status error code returned here. diff --git a/deps/icu-small/source/i18n/unicode/datefmt.h b/deps/icu-small/source/i18n/unicode/datefmt.h index d70d8d1dd5..c895183931 100644 --- a/deps/icu-small/source/i18n/unicode/datefmt.h +++ b/deps/icu-small/source/i18n/unicode/datefmt.h @@ -44,7 +44,8 @@ class TimeZone; class DateTimePatternGenerator; // explicit template instantiation. see digitlst.h -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN template class U_I18N_API EnumSet<UDateFormatBooleanAttribute, 0, UDAT_BOOLEAN_ATTRIBUTE_COUNT>; diff --git a/deps/icu-small/source/i18n/unicode/dcfmtsym.h b/deps/icu-small/source/i18n/unicode/dcfmtsym.h index 4dc6f950f2..e58befa31b 100644 --- a/deps/icu-small/source/i18n/unicode/dcfmtsym.h +++ b/deps/icu-small/source/i18n/unicode/dcfmtsym.h @@ -80,10 +80,6 @@ U_NAMESPACE_BEGIN * If you supply a pattern with multiple grouping characters, the interval * between the last one and the end of the integer is the one that is * used. So "#,##,###,####" == "######,####" == "##,####,####". - * <P> - * This class only handles localized digits where the 10 digits are - * contiguous in Unicode, from 0 to 9. Other digits sets (such as - * superscripts) would need a different subclass. */ class U_I18N_API DecimalFormatSymbols : public UObject { public: @@ -396,6 +392,13 @@ public: inline UBool isCustomIntlCurrencySymbol() const { return fIsCustomIntlCurrencySymbol; } + + /** + * @internal For ICU use only + */ + inline UChar32 getCodePointZero() const { + return fCodePointZero; + } #endif /* U_HIDE_INTERNAL_API */ /** @@ -410,10 +413,23 @@ public: * @return the format symbol by the param 'symbol' * @internal */ - inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const; + inline const UnicodeString& getConstSymbol(ENumberFormatSymbol symbol) const; #ifndef U_HIDE_INTERNAL_API /** + * Returns the const UnicodeString reference, like getConstSymbol, + * corresponding to the digit with the given value. This is equivalent + * to accessing the symbol from getConstSymbol with the corresponding + * key, such as kZeroDigitSymbol or kOneDigitSymbol. + * + * @param digit The digit, an integer between 0 and 9 inclusive. + * If outside the range 0 to 9, the zero digit is returned. + * @return the format symbol for the given digit. + * @internal This API is currently for ICU use only. + */ + inline const UnicodeString& getConstDigitSymbol(int32_t digit) const; + + /** * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. * @internal */ @@ -444,6 +460,22 @@ private: */ UnicodeString fNoSymbol; + /** + * Dealing with code points is faster than dealing with strings when formatting. Because of + * this, we maintain a value containing the zero code point that is used whenever digitStrings + * represents a sequence of ten code points in order. + * + * <p>If the value stored here is positive, it means that the code point stored in this value + * corresponds to the digitStrings array, and codePointZero can be used instead of the + * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does + * *not* contain a sequence of code points, and it must be used directly. + * + * <p>It is assumed that codePointZero always shadows the value in digitStrings. codePointZero + * should never be set directly; rather, it should be updated only when digitStrings mutates. + * That is, the flow of information is digitStrings -> codePointZero, not the other way. + */ + UChar32 fCodePointZero; + Locale locale; char actualLocale[ULOC_FULLNAME_CAPACITY]; @@ -469,7 +501,7 @@ DecimalFormatSymbols::getSymbol(ENumberFormatSymbol symbol) const { return *strPtr; } -// See comments above for this function. Not hidden with #ifndef U_HIDE_INTERNAL_API +// See comments above for this function. Not hidden with #ifdef U_HIDE_INTERNAL_API inline const UnicodeString & DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const { const UnicodeString *strPtr; @@ -481,6 +513,19 @@ DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const { return *strPtr; } +#ifndef U_HIDE_INTERNAL_API +inline const UnicodeString& DecimalFormatSymbols::getConstDigitSymbol(int32_t digit) const { + if (digit < 0 || digit > 9) { + digit = 0; + } + if (digit == 0) { + return fSymbols[kZeroDigitSymbol]; + } + ENumberFormatSymbol key = static_cast<ENumberFormatSymbol>(kOneDigitSymbol + digit - 1); + return fSymbols[key]; +} +#endif + // ------------------------------------- inline void @@ -497,14 +542,20 @@ DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString // If the zero digit is being set to a known zero digit according to Unicode, // then we automatically set the corresponding 1-9 digits - if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) { + // Also record updates to fCodePointZero. Be conservative if in doubt. + if (symbol == kZeroDigitSymbol) { UChar32 sym = value.char32At(0); - if ( u_charDigitValue(sym) == 0 ) { + if ( propogateDigits && u_charDigitValue(sym) == 0 && value.countChar32() == 1 ) { + fCodePointZero = sym; for ( int8_t i = 1 ; i<= 9 ; i++ ) { sym++; fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym); } + } else { + fCodePointZero = -1; } + } else if (symbol >= kOneDigitSymbol && symbol <= kNineDigitSymbol) { + fCodePointZero = -1; } } diff --git a/deps/icu-small/source/i18n/unicode/decimfmt.h b/deps/icu-small/source/i18n/unicode/decimfmt.h index 790053636d..b062208d9b 100644 --- a/deps/icu-small/source/i18n/unicode/decimfmt.h +++ b/deps/icu-small/source/i18n/unicode/decimfmt.h @@ -30,7 +30,7 @@ #include "unicode/utypes.h" /** * \file - * \brief C++ API: Formats decimal numbers. + * \brief C++ API: Compatibility APIs for decimal formatting. */ #if !UCONFIG_NO_FORMATTING @@ -67,13 +67,19 @@ class PluralRules; class VisibleDigitsWithExponent; // explicit template instantiation. see digitlst.h -#if defined (_MSC_VER) +// (When building DLLs for Windows this is required.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN template class U_I18N_API EnumSet<UNumberFormatAttribute, UNUM_MAX_NONBOOLEAN_ATTRIBUTE+1, UNUM_LIMIT_BOOLEAN_ATTRIBUTE>; #endif /** + * <p><strong>IMPORTANT:</strong> New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. + * <hr/> + * * DecimalFormat is a concrete subclass of NumberFormat that formats decimal * numbers. It has a variety of features designed to make it possible to parse * and format numbers in any locale, including support for Western, Arabic, or @@ -688,6 +694,9 @@ public: * on NumberFormat such as createInstance. These factories will * return the most appropriate sub-class of NumberFormat for a given * locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * @param status Output param set to success/failure code. If the * pattern is invalid this will be set to a failure code. * @stable ICU 2.0 @@ -703,6 +712,9 @@ public: * on NumberFormat such as createInstance. These factories will * return the most appropriate sub-class of NumberFormat for a given * locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * @param pattern A non-localized pattern string. * @param status Output param set to success/failure code. If the * pattern is invalid this will be set to a failure code. @@ -721,6 +733,9 @@ public: * createInstance or createCurrencyInstance. If you need only minor adjustments * to a standard format, you can modify the format returned by * a NumberFormat factory method. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbolsToAdopt the set of symbols to be used. The caller should not @@ -826,6 +841,9 @@ public: * createInstance or createCurrencyInstance. If you need only minor adjustments * to a standard format, you can modify the format returned by * a NumberFormat factory method. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbolsToAdopt the set of symbols to be used. The caller should not @@ -849,6 +867,9 @@ public: * createInstance or createCurrencyInstance. If you need only minor adjustments * to a standard format, you can modify the format returned by * a NumberFormat factory method. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbols the set of symbols to be used @@ -1964,12 +1985,14 @@ public: UCurrencyUsage getCurrencyUsage() const; +#ifndef U_HIDE_DEPRECATED_API /** * The resource tags we use to retrieve decimal format data from * locale resource bundles. * @deprecated ICU 3.4. This string has no public purpose. Please don't use it. */ static const char fgNumberPatterns[]; +#endif // U_HIDE_DEPRECATED_API #ifndef U_HIDE_INTERNAL_API /** diff --git a/deps/icu-small/source/i18n/unicode/dtptngen.h b/deps/icu-small/source/i18n/unicode/dtptngen.h index 5712edbb9f..feb465e799 100644 --- a/deps/icu-small/source/i18n/unicode/dtptngen.h +++ b/deps/icu-small/source/i18n/unicode/dtptngen.h @@ -263,14 +263,29 @@ public: /** * Getter corresponding to setAppendItemNames. Values below 0 or at or above - * UDATPG_FIELD_COUNT are illegal arguments. + * UDATPG_FIELD_COUNT are illegal arguments. Note: The more general method + * for getting date/time field display names is getFieldDisplayName. * * @param field such as UDATPG_ERA_FIELD. * @return name for field + * @see getFieldDisplayName * @stable ICU 3.8 */ const UnicodeString& getAppendItemName(UDateTimePatternField field) const; +#ifndef U_HIDE_DRAFT_API + /** + * The general interface to get a display name for a particular date/time field, + * in one of several possible display widths. + * + * @param field The desired UDateTimePatternField, such as UDATPG_ERA_FIELD. + * @param width The desired UDateTimePGDisplayWidth, such as UDATPG_ABBREVIATED. + * @return. The display name for field + * @draft ICU 61 + */ + UnicodeString getFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) const; +#endif // U_HIDE_DRAFT_API + /** * The DateTimeFormat is a message format pattern used to compose date and * time patterns. The default pattern in the root locale is "{1} {0}", where @@ -507,13 +522,17 @@ private: */ DateTimePatternGenerator& operator=(const DateTimePatternGenerator& other); + // TODO(ticket:13619): re-enable when UDATPG_NARROW no longer in draft mode. + // static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; + Locale pLocale; // pattern locale FormatParser *fp; DateTimeMatcher* dtMatcher; DistanceInfo *distanceInfo; PatternMap *patternMap; UnicodeString appendItemFormats[UDATPG_FIELD_COUNT]; - UnicodeString appendItemNames[UDATPG_FIELD_COUNT]; + // TODO(ticket:13619): [3] -> UDATPG_WIDTH_COUNT + UnicodeString fieldDisplayNames[UDATPG_FIELD_COUNT][3]; UnicodeString dateTimeFormat; UnicodeString decimal; DateTimeMatcher *skipMatcher; @@ -543,8 +562,11 @@ private: void setDateTimeFromCalendar(const Locale& locale, UErrorCode& status); void setDecimalSymbols(const Locale& locale, UErrorCode& status); UDateTimePatternField getAppendFormatNumber(const char* field) const; - UDateTimePatternField getAppendNameNumber(const char* field) const; - UnicodeString& getMutableAppendItemName(UDateTimePatternField field); +#ifndef U_HIDE_DRAFT_API + UDateTimePatternField getFieldAndWidthIndices(const char* key, UDateTimePGDisplayWidth* widthP) const; + void setFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width, const UnicodeString& value); + UnicodeString& getMutableFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width); +#endif // U_HIDE_DRAFT_API void getAppendName(UDateTimePatternField field, UnicodeString& value); UnicodeString mapSkeletonMetacharacters(const UnicodeString& patternForm, int32_t* flags, UErrorCode& status); int32_t getCanonicalIndex(const UnicodeString& field); diff --git a/deps/icu-small/source/i18n/unicode/measfmt.h b/deps/icu-small/source/i18n/unicode/measfmt.h index 251fd213b5..14399dd59a 100644 --- a/deps/icu-small/source/i18n/unicode/measfmt.h +++ b/deps/icu-small/source/i18n/unicode/measfmt.h @@ -22,7 +22,7 @@ /** * \file - * \brief C++ API: Formatter for measure objects. + * \brief C++ API: Compatibility APIs for measure formatting. */ /** @@ -87,8 +87,9 @@ class ListFormatter; class DateFormat; /** - * - * A formatter for measure objects. + * <p><strong>IMPORTANT:</strong> New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. * * @see Format * @author Alan Liu @@ -101,6 +102,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Constructor. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 53 */ MeasureFormat( @@ -108,6 +112,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Constructor. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 53 */ MeasureFormat( @@ -227,6 +234,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Return a formatter for CurrencyAmount objects in the given * locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param locale desired locale * @param ec input-output error code * @return a formatter object, or NULL upon error @@ -238,6 +248,9 @@ class U_I18N_API MeasureFormat : public Format { /** * Return a formatter for CurrencyAmount objects in the default * locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param ec input-output error code * @return a formatter object, or NULL upon error * @stable ICU 3.0 diff --git a/deps/icu-small/source/i18n/unicode/measunit.h b/deps/icu-small/source/i18n/unicode/measunit.h index 08c8d6f588..f7ddb4e20c 100644 --- a/deps/icu-small/source/i18n/unicode/measunit.h +++ b/deps/icu-small/source/i18n/unicode/measunit.h @@ -196,8 +196,8 @@ class U_I18N_API MeasureUnit: public UObject { * ICU use only. * @internal */ - static MeasureUnit *resolveUnitPerUnit( - const MeasureUnit &unit, const MeasureUnit &perUnit); + static MeasureUnit resolveUnitPerUnit( + const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved); #endif /* U_HIDE_INTERNAL_API */ // All code between the "Start generated createXXX methods" comment and @@ -832,15 +832,13 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createPicometer(UErrorCode &status); -#ifndef U_HIDE_DRAFT_API /** * Returns unit of length: point. * Caller owns returned value and must free it. * @param status ICU error code. - * @draft ICU 59 + * @stable ICU 59 */ static MeasureUnit *createPoint(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ /** * Returns unit of length: yard. diff --git a/deps/icu-small/source/i18n/unicode/nounit.h b/deps/icu-small/source/i18n/unicode/nounit.h index 04fc84b33a..290e77e880 100644 --- a/deps/icu-small/source/i18n/unicode/nounit.h +++ b/deps/icu-small/source/i18n/unicode/nounit.h @@ -10,17 +10,17 @@ #ifndef __NOUNIT_H__ #define __NOUNIT_H__ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/measunit.h" /** * \file * \brief C++ API: units for percent and permille */ - -#include "unicode/measunit.h" - -#if !UCONFIG_NO_FORMATTING - U_NAMESPACE_BEGIN #ifndef U_HIDE_DRAFT_API diff --git a/deps/icu-small/source/i18n/unicode/numberformatter.h b/deps/icu-small/source/i18n/unicode/numberformatter.h index 4a11c2f915..3fbb33ccee 100644 --- a/deps/icu-small/source/i18n/unicode/numberformatter.h +++ b/deps/icu-small/source/i18n/unicode/numberformatter.h @@ -88,10 +88,6 @@ * </ul> * * <p> - * * The narrow format for currencies is not currently supported; this is a known issue that will be fixed in a - * future version. See #11666 for more information. - * - * <p> * This enum is similar to {@link com.ibm.icu.text.MeasureFormat.FormatWidth}. * * @draft ICU 60 @@ -155,27 +151,122 @@ typedef enum UNumberUnitWidth { * * @draft ICU 60 */ - UNUM_UNIT_WIDTH_HIDDEN, + UNUM_UNIT_WIDTH_HIDDEN +#ifndef U_HIDE_INTERNAL_API + , /** * One more than the highest UNumberUnitWidth value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ UNUM_UNIT_WIDTH_COUNT +#endif // U_HIDE_INTERNAL_API } UNumberUnitWidth; /** - * An enum declaring how to denote positive and negative numbers. Example outputs when formatting 123 and -123 in - * <em>en-US</em>: + * An enum declaring the strategy for when and how to display grouping separators (i.e., the + * separator, often a comma or period, after every 2-3 powers of ten). The choices are several + * pre-built strategies for different use cases that employ locale data whenever possible. Example + * outputs for 1234 and 1234567 in <em>en-IN</em>: + * + * <ul> + * <li>OFF: 1234 and 12345 + * <li>MIN2: 1234 and 12,34,567 + * <li>AUTO: 1,234 and 12,34,567 + * <li>ON_ALIGNED: 1,234 and 12,34,567 + * <li>THOUSANDS: 1,234 and 1,234,567 + * </ul> * * <p> + * The default is AUTO, which displays grouping separators unless the locale data says that grouping + * is not customary. To force grouping for all numbers greater than 1000 consistently across locales, + * use ON_ALIGNED. On the other hand, to display grouping less frequently than the default, use MIN2 + * or OFF. See the docs of each option for details. + * + * <p> + * Note: This enum specifies the strategy for grouping sizes. To set which character to use as the + * grouping separator, use the "symbols" setter. + * + * @draft ICU 61 + */ +typedef enum UGroupingStrategy { + /** + * Do not display grouping separators in any locale. + * + * @draft ICU 61 + */ + UNUM_GROUPING_OFF, + + /** + * Display grouping using locale defaults, except do not show grouping on values smaller than + * 10000 (such that there is a <em>minimum of two digits</em> before the first separator). + * + * <p> + * Note that locales may restrict grouping separators to be displayed only on 1 million or + * greater (for example, ee and hu) or disable grouping altogether (for example, bg currency). + * + * <p> + * Locale data is used to determine whether to separate larger numbers into groups of 2 + * (customary in South Asia) or groups of 3 (customary in Europe and the Americas). + * + * @draft ICU 61 + */ + UNUM_GROUPING_MIN2, + + /** + * Display grouping using the default strategy for all locales. This is the default behavior. + * + * <p> + * Note that locales may restrict grouping separators to be displayed only on 1 million or + * greater (for example, ee and hu) or disable grouping altogether (for example, bg currency). + * + * <p> + * Locale data is used to determine whether to separate larger numbers into groups of 2 + * (customary in South Asia) or groups of 3 (customary in Europe and the Americas). + * + * @draft ICU 61 + */ + UNUM_GROUPING_AUTO, + + /** + * Always display the grouping separator on values of at least 1000. + * + * <p> + * This option ignores the locale data that restricts or disables grouping, described in MIN2 and + * AUTO. This option may be useful to normalize the alignment of numbers, such as in a + * spreadsheet. + * + * <p> + * Locale data is used to determine whether to separate larger numbers into groups of 2 + * (customary in South Asia) or groups of 3 (customary in Europe and the Americas). + * + * @draft ICU 61 + */ + UNUM_GROUPING_ON_ALIGNED, + + /** + * Use the Western defaults: groups of 3 and enabled for all numbers 1000 or greater. Do not use + * locale data for determining the grouping strategy. + * + * @draft ICU 61 + */ + UNUM_GROUPING_THOUSANDS + +} UGroupingStrategy; + +/** + * An enum declaring how to denote positive and negative numbers. Example outputs when formatting + * 123, 0, and -123 in <em>en-US</em>: + * * <ul> - * <li>AUTO: "123", "-123" - * <li>ALWAYS: "+123", "-123" - * <li>NEVER: "123", "123" - * <li>ACCOUNTING: "$123", "($123)" - * <li>ACCOUNTING_ALWAYS: "+$123", "($123)" + * <li>AUTO: "123", "0", and "-123" + * <li>ALWAYS: "+123", "+0", and "-123" + * <li>NEVER: "123", "0", and "123" + * <li>ACCOUNTING: "$123", "$0", and "($123)" + * <li>ACCOUNTING_ALWAYS: "+$123", "+$0", and "($123)" + * <li>EXCEPT_ZERO: "+123", "0", and "-123" + * <li>ACCOUNTING_EXCEPT_ZERO: "+$123", "$0", and "($123)" * </ul> * * <p> @@ -190,21 +281,22 @@ typedef enum UNumberSignDisplay { * * @draft ICU 60 */ - UNUM_SIGN_AUTO, + UNUM_SIGN_AUTO, /** - * Show the minus sign on negative numbers and the plus sign on positive numbers. + * Show the minus sign on negative numbers and the plus sign on positive numbers, including zero. + * To hide the sign on zero, see {@link UNUM_SIGN_EXCEPT_ZERO}. * * @draft ICU 60 */ - UNUM_SIGN_ALWAYS, + UNUM_SIGN_ALWAYS, /** * Do not show the sign on positive or negative numbers. * * @draft ICU 60 */ - UNUM_SIGN_NEVER, + UNUM_SIGN_NEVER, /** * Use the locale-dependent accounting format on negative numbers, and do not show the sign on positive numbers. @@ -220,22 +312,44 @@ typedef enum UNumberSignDisplay { * * @draft ICU 60 */ - UNUM_SIGN_ACCOUNTING, + UNUM_SIGN_ACCOUNTING, /** - * Use the locale-dependent accounting format on negative numbers, and show the plus sign on positive numbers. - * For more information on the accounting format, see the ACCOUNTING sign display strategy. + * Use the locale-dependent accounting format on negative numbers, and show the plus sign on + * positive numbers, including zero. For more information on the accounting format, see the + * ACCOUNTING sign display strategy. To hide the sign on zero, see + * {@link UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO}. * * @draft ICU 60 */ - UNUM_SIGN_ACCOUNTING_ALWAYS, + UNUM_SIGN_ACCOUNTING_ALWAYS, + + /** + * Show the minus sign on negative numbers and the plus sign on positive numbers. Do not show a + * sign on zero. + * + * @draft ICU 61 + */ + UNUM_SIGN_EXCEPT_ZERO, + + /** + * Use the locale-dependent accounting format on negative numbers, and show the plus sign on + * positive numbers. Do not show a sign on zero. For more information on the accounting format, + * see the ACCOUNTING sign display strategy. + * + * @draft ICU 61 + */ + UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO +#ifndef U_HIDE_INTERNAL_API + , /** * One more than the highest UNumberSignDisplay value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ - UNUM_SIGN_COUNT + UNUM_SIGN_COUNT +#endif // U_HIDE_INTERNAL_API } UNumberSignDisplay; /** @@ -261,14 +375,17 @@ typedef enum UNumberDecimalSeparatorDisplay { * * @draft ICU 60 */ - UNUM_DECIMAL_SEPARATOR_ALWAYS, + UNUM_DECIMAL_SEPARATOR_ALWAYS +#ifndef U_HIDE_INTERNAL_API + , /** * One more than the highest UNumberDecimalSeparatorDisplay value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ UNUM_DECIMAL_SEPARATOR_COUNT +#endif // U_HIDE_INTERNAL_API } UNumberDecimalMarkDisplay; U_NAMESPACE_BEGIN namespace number { // icu::number @@ -283,11 +400,27 @@ class Rounder; class FractionRounder; class CurrencyRounder; class IncrementRounder; -class Grouper; class IntegerWidth; namespace impl { +#ifndef U_HIDE_INTERNAL_API +/** + * Datatype for minimum/maximum fraction digits. Must be able to hold kMaxIntFracSig. + * + * @internal + */ +typedef int16_t digits_t; + +/** + * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built + * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path. + * + * @internal + */ +static constexpr int32_t DEFAULT_THRESHOLD = 3; +#endif // U_HIDE_INTERNAL_API + // Forward declarations: class Padder; struct MacroProps; @@ -471,7 +604,7 @@ class U_I18N_API Notation : public UMemory { struct ScientificSettings { int8_t fEngineeringInterval; bool fRequireMinInt; - int8_t fMinExponentDigits; + impl::digits_t fMinExponentDigits; UNumberSignDisplay fExponentSignDisplay; } scientific; @@ -786,14 +919,14 @@ class U_I18N_API Rounder : public UMemory { union RounderUnion { struct FractionSignificantSettings { // For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT - int8_t fMinFrac; - int8_t fMaxFrac; - int8_t fMinSig; - int8_t fMaxSig; + impl::digits_t fMinFrac; + impl::digits_t fMaxFrac; + impl::digits_t fMinSig; + impl::digits_t fMaxSig; } fracSig; struct IncrementSettings { double fIncrement; - int32_t fMinFrac; + impl::digits_t fMinFrac; } increment; // For RND_INCREMENT UCurrencyUsage currencyUsage; // For RND_CURRENCY UErrorCode errorCode; // For RND_ERROR @@ -836,6 +969,20 @@ class U_I18N_API Rounder : public UMemory { /** Version of {@link #apply} that obeys minInt constraints. Used for scientific notation compatibility mode. */ void apply(impl::DecimalQuantity &value, int32_t minInt, UErrorCode status); + /** + * Rounding endpoint used by Engineering and Compact notation. Chooses the most appropriate multiplier (magnitude + * adjustment), applies the adjustment, rounds, and returns the chosen multiplier. + * + * <p> + * In most cases, this is simple. However, when rounding the number causes it to cross a multiplier boundary, we + * need to re-do the rounding. For example, to display 999,999 in Engineering notation with 2 sigfigs, first you + * guess the multiplier to be -3. However, then you end up getting 1000E3, which is not the correct output. You then + * change your multiplier to be -6, and you get 1.0E6, which is correct. + * + * @param input The quantity to process. + * @param producer Function to call to return a multiplier based on a magnitude. + * @return The number of orders of magnitude the input was adjusted by this method. + */ int32_t chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl::MultiplierProducer &producer, UErrorCode &status); @@ -1003,53 +1150,6 @@ class U_I18N_API IncrementRounder : public Rounder { }; /** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ -class U_I18N_API Grouper : public UMemory { - public: - /** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ - static Grouper defaults(); - - /** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ - static Grouper minTwoDigits(); - - /** - * @internal This API is a technical preview. It is likely to change in an upcoming release. - */ - static Grouper none(); - - private: - int8_t fGrouping1; // -3 means "bogus"; -2 means "needs locale data"; -1 means "no grouping" - int8_t fGrouping2; - bool fMin2; - - Grouper(int8_t grouping1, int8_t grouping2, bool min2) - : fGrouping1(grouping1), fGrouping2(grouping2), fMin2(min2) {} - - Grouper() : fGrouping1(-3) {}; - - bool isBogus() const { - return fGrouping1 == -3; - } - - /** NON-CONST: mutates the current instance. */ - void setLocaleData(const impl::ParsedPatternInfo &patternInfo); - - bool groupAtPosition(int32_t position, const impl::DecimalQuantity &value) const; - - // To allow MacroProps/MicroProps to initialize empty instances: - friend struct impl::MacroProps; - friend struct impl::MicroProps; - - // To allow NumberFormatterImpl to access isBogus() and perform other operations: - friend class impl::NumberFormatterImpl; -}; - -/** * A class that defines the strategy for padding and truncating integers before the decimal separator. * * <p> @@ -1080,7 +1180,8 @@ class U_I18N_API IntegerWidth : public UMemory { * For example, with maxInt=3, the number 1234 will get printed as "234". * * @param maxInt - * The maximum number of places before the decimal separator. + * The maximum number of places before the decimal separator. maxInt == -1 means no + * truncation. * @return An IntegerWidth for passing to the NumberFormatter integerWidth() setter. * @draft ICU 60 * @see NumberFormatter @@ -1090,14 +1191,14 @@ class U_I18N_API IntegerWidth : public UMemory { private: union { struct { - int8_t fMinInt; - int8_t fMaxInt; + impl::digits_t fMinInt; + impl::digits_t fMaxInt; } minMaxInt; UErrorCode errorCode; } fUnion; bool fHasError = false; - IntegerWidth(int8_t minInt, int8_t maxInt); + IntegerWidth(impl::digits_t minInt, impl::digits_t maxInt); IntegerWidth(UErrorCode errorCode) { // NOLINT fUnion.errorCode = errorCode; @@ -1132,14 +1233,7 @@ class U_I18N_API IntegerWidth : public UMemory { namespace impl { -/** - * Use a default threshold of 3. This means that the third time .format() is called, the data structures get built - * using the "safe" code path. The first two calls to .format() will trigger the unsafe code path. - * - * @internal - */ -static constexpr int32_t DEFAULT_THRESHOLD = 3; - +// Do not enclose entire SymbolsWrapper with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** @internal */ class U_I18N_API SymbolsWrapper : public UMemory { public: @@ -1155,6 +1249,7 @@ class U_I18N_API SymbolsWrapper : public UMemory { /** @internal */ SymbolsWrapper &operator=(const SymbolsWrapper &other); +#ifndef U_HIDE_INTERNAL_API /** * The provided object is copied, but we do not adopt it. * @internal @@ -1202,6 +1297,7 @@ class U_I18N_API SymbolsWrapper : public UMemory { } return FALSE; } +#endif // U_HIDE_INTERNAL_API private: enum SymbolsPointerType { @@ -1218,14 +1314,72 @@ class U_I18N_API SymbolsWrapper : public UMemory { void doCleanup(); }; +// Do not enclose entire Grouper with #ifndef U_HIDE_INTERNAL_API, needed for a protected field +/** @internal */ +class U_I18N_API Grouper : public UMemory { + public: +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static Grouper forStrategy(UGroupingStrategy grouping); + + // Future: static Grouper forProperties(DecimalFormatProperties& properties); + + /** @internal */ + Grouper(int16_t grouping1, int16_t grouping2, int16_t minGrouping) + : fGrouping1(grouping1), fGrouping2(grouping2), fMinGrouping(minGrouping) {} +#endif // U_HIDE_INTERNAL_API + + private: + /** + * The grouping sizes, with the following special values: + * <ul> + * <li>-1 = no grouping + * <li>-2 = needs locale data + * <li>-4 = fall back to Western grouping if not in locale + * </ul> + */ + int16_t fGrouping1; + int16_t fGrouping2; + + /** + * The minimum gropuing size, with the following special values: + * <ul> + * <li>-2 = needs locale data + * <li>-3 = no less than 2 + * </ul> + */ + int16_t fMinGrouping; + + Grouper() : fGrouping1(-3) {}; + + bool isBogus() const { + return fGrouping1 == -3; + } + + /** NON-CONST: mutates the current instance. */ + void setLocaleData(const impl::ParsedPatternInfo &patternInfo, const Locale& locale); + + bool groupAtPosition(int32_t position, const impl::DecimalQuantity &value) const; + + // To allow MacroProps/MicroProps to initialize empty instances: + friend struct MacroProps; + friend struct MicroProps; + + // To allow NumberFormatterImpl to access isBogus() and perform other operations: + friend class NumberFormatterImpl; +}; + +// Do not enclose entire Padder with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** @internal */ class U_I18N_API Padder : public UMemory { public: +#ifndef U_HIDE_INTERNAL_API /** @internal */ static Padder none(); /** @internal */ static Padder codePoints(UChar32 cp, int32_t targetWidth, UNumberFormatPadPosition position); +#endif // U_HIDE_INTERNAL_API private: UChar32 fWidth; // -3 = error; -2 = bogus; -1 = no padding @@ -1275,6 +1429,7 @@ class U_I18N_API Padder : public UMemory { friend class impl::NumberFormatterImpl; }; +// Do not enclose entire MacroProps with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** @internal */ struct U_I18N_API MacroProps : public UMemory { /** @internal */ @@ -1284,6 +1439,9 @@ struct U_I18N_API MacroProps : public UMemory { MeasureUnit unit; // = NoUnit::base(); /** @internal */ + MeasureUnit perUnit; // = NoUnit::base(); + + /** @internal */ Rounder rounder; // = Rounder(); (bogus) /** @internal */ @@ -1375,29 +1533,30 @@ class U_I18N_API NumberFormatterSettings { * <li>Percent: "12.3%" * </ul> * - * <p> * All units will be properly localized with locale data, and all units are compatible with notation styles, * rounding strategies, and other number formatter settings. * - * <p> - * Pass this method any instance of {@link MeasureUnit}. For units of measure: + * Pass this method any instance of {@link MeasureUnit}. For units of measure (which often involve the + * factory methods that return a pointer): * * <pre> - * NumberFormatter.with().adoptUnit(MeasureUnit::createMeter(status)) + * NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status)) * </pre> * * Currency: * * <pre> - * NumberFormatter.with()::unit(CurrencyUnit(u"USD", status)) + * NumberFormatter::with().unit(CurrencyUnit(u"USD", status)) * </pre> * * Percent: * * <pre> - * NumberFormatter.with()::unit(NoUnit.percent()) + * NumberFormatter::with().unit(NoUnit.percent()) * </pre> * + * See {@link #perUnit} for information on how to format strings like "5 meters per second". + * * The default is to render without units (equivalent to NoUnit.base()). * * @param unit @@ -1406,22 +1565,65 @@ class U_I18N_API NumberFormatterSettings { * @see MeasureUnit * @see Currency * @see NoUnit + * @see #perUnit * @draft ICU 60 */ Derived unit(const icu::MeasureUnit &unit) const; /** * Like unit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory - * methods, which return pointers that need ownership. + * methods, which return pointers that need ownership. Example: + * + * <pre> + * NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status)) + * </pre> * * @param unit - * The unit to render. + * The unit to render. * @return The fluent chain. * @see #unit * @see MeasureUnit * @draft ICU 60 */ - Derived adoptUnit(const icu::MeasureUnit *unit) const; + Derived adoptUnit(icu::MeasureUnit *unit) const; + + /** + * Sets a unit to be used in the denominator. For example, to format "3 m/s", pass METER to the unit and SECOND to + * the perUnit. + * + * Pass this method any instance of {@link MeasureUnit}. Since MeasureUnit factory methods return pointers, the + * {@link #adoptPerUnit} version of this method is often more useful. + * + * The default is not to display any unit in the denominator. + * + * If a per-unit is specified without a primary unit via {@link #unit}, the behavior is undefined. + * + * @param perUnit + * The unit to render in the denominator. + * @return The fluent chain + * @see #unit + * @draft ICU 61 + */ + Derived perUnit(const icu::MeasureUnit &perUnit) const; + + /** + * Like perUnit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory + * methods, which return pointers that need ownership. Example: + * + * <pre> + * NumberFormatter::with() + * .adoptUnit(MeasureUnit::createMeter(status)) + * .adoptPerUnit(MeasureUnit::createSecond(status)) + * </pre> + * + * @param perUnit + * The unit to render in the denominator. + * @return The fluent chain. + * @see #perUnit + * @see MeasureUnit + * @draft ICU 61 + */ + Derived adoptPerUnit(icu::MeasureUnit *perUnit) const; /** * Specifies the rounding strategy to use when formatting numbers. @@ -1456,8 +1658,6 @@ class U_I18N_API NumberFormatterSettings { */ Derived rounding(const Rounder &rounder) const; -#ifndef U_HIDE_INTERNAL_API - /** * Specifies the grouping strategy to use when formatting numbers. * @@ -1471,25 +1671,21 @@ class U_I18N_API NumberFormatterSettings { * The exact grouping widths will be chosen based on the locale. * * <p> - * Pass this method the return value of one of the factory methods on {@link Grouper}. For example: + * Pass this method an element from the {@link UGroupingStrategy} enum. For example: * * <pre> - * NumberFormatter::with().grouping(Grouper::min2()) + * NumberFormatter::with().grouping(UNUM_GROUPING_MIN2) * </pre> * - * The default is to perform grouping without concern for the minimum grouping digits. + * The default is to perform grouping according to locale data; most locales, but not all locales, + * enable it by default. * - * @param grouper + * @param strategy * The grouping strategy to use. * @return The fluent chain. - * @see Grouper - * @see Notation - * @internal - * @internal ICU 60: This API is technical preview. + * @draft ICU 61 */ - Derived grouping(const Grouper &grouper) const; - -#endif /* U_HIDE_INTERNAL_API */ + Derived grouping(const UGroupingStrategy &strategy) const; /** * Specifies the minimum and maximum number of digits to render before the decimal mark. @@ -1592,7 +1788,7 @@ class U_I18N_API NumberFormatterSettings { * @see NumberingSystem * @draft ICU 60 */ - Derived adoptSymbols(const NumberingSystem *symbols) const; + Derived adoptSymbols(NumberingSystem *symbols) const; /** * Sets the width of the unit (measure unit or currency). Most common values: diff --git a/deps/icu-small/source/i18n/unicode/numfmt.h b/deps/icu-small/source/i18n/unicode/numfmt.h index d6b2a6d53c..1332f52566 100644 --- a/deps/icu-small/source/i18n/unicode/numfmt.h +++ b/deps/icu-small/source/i18n/unicode/numfmt.h @@ -28,7 +28,7 @@ /** * \file - * \brief C++ API: Abstract base class for all number formats. + * \brief C++ API: Compatibility APIs for number formatting. */ #if !UCONFIG_NO_FORMATTING @@ -53,16 +53,16 @@ class StringEnumeration; #endif /** + * <p><strong>IMPORTANT:</strong> New users are strongly encouraged to see if + * numberformatter.h fits their use case. Although not deprecated, this header + * is provided for backwards compatibility only. + * <hr/> * * Abstract base class for all number formats. Provides interface for * formatting and parsing a number. Also provides methods for * determining which locales have number formats, and what their names * are. * - * <p><strong>NOTE:</strong> Starting in ICU 60, there is a new set of APIs for localized number - * formatting that are designed to be an improvement over DecimalFormat. New users are discouraged - * from using DecimalFormat. For more information, see numberformatter.h. - * * \headerfile unicode/numfmt.h "unicode/numfmt.h" * <P> * NumberFormat helps you to format and parse numbers for any locale. @@ -708,6 +708,9 @@ public: /** * Create a default style NumberFormat for the current default locale. * The default formatting style is locale dependent. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createInstance(UErrorCode&); @@ -716,6 +719,9 @@ public: * Create a default style NumberFormat for the specified locale. * The default formatting style is locale dependent. * @param inLocale the given locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createInstance(const Locale& inLocale, @@ -723,6 +729,9 @@ public: /** * Create a specific style NumberFormat for the specified locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param desiredLocale the given locale. * @param style the given style. * @param errorCode Output param filled with success/failure status. @@ -759,12 +768,18 @@ public: /** * Returns a currency format for the current default locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createCurrencyInstance(UErrorCode&); /** * Returns a currency format for the specified locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ @@ -773,12 +788,18 @@ public: /** * Returns a percentage format for the current default locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createPercentInstance(UErrorCode&); /** * Returns a percentage format for the specified locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ @@ -787,12 +808,18 @@ public: /** * Returns a scientific format for the current default locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createScientificInstance(UErrorCode&); /** * Returns a scientific format for the specified locale. + * <p> + * <strong>NOTE:</strong> New users are strongly encouraged to use + * {@link NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/i18n/unicode/simpletz.h b/deps/icu-small/source/i18n/unicode/simpletz.h index 1b23ab79d1..7f5f1664cc 100644 --- a/deps/icu-small/source/i18n/unicode/simpletz.h +++ b/deps/icu-small/source/i18n/unicode/simpletz.h @@ -647,7 +647,8 @@ public: * Sets the amount of time in ms that the clock is advanced during DST. * @param millisSavedDuringDST the number of milliseconds the time is * advanced with respect to standard time when the daylight savings rules - * are in effect. A positive number, typically one hour (3600000). + * are in effect. Typically one hour (+3600000). The amount could be negative, + * but not 0. * @param status An UErrorCode to receive the status. * @stable ICU 2.0 */ @@ -657,7 +658,8 @@ public: * Returns the amount of time in ms that the clock is advanced during DST. * @return the number of milliseconds the time is * advanced with respect to standard time when the daylight savings rules - * are in effect. A positive number, typically one hour (3600000). + * are in effect. Typically one hour (+3600000). The amount could be negative, + * but not 0. * @stable ICU 2.0 */ virtual int32_t getDSTSavings(void) const; diff --git a/deps/icu-small/source/i18n/unicode/udatpg.h b/deps/icu-small/source/i18n/unicode/udatpg.h index 9e3bdd4114..54f1254346 100644 --- a/deps/icu-small/source/i18n/unicode/udatpg.h +++ b/deps/icu-small/source/i18n/unicode/udatpg.h @@ -95,6 +95,21 @@ typedef enum UDateTimePatternField { UDATPG_FIELD_COUNT } UDateTimePatternField; +#ifndef U_HIDE_DRAFT_API +/** + * Field display name width constants for udatpg_getFieldDisplayName(). + * @draft ICU 61 + */ +typedef enum UDateTimePGDisplayWidth { + /** @draft ICU 61 */ + UDATPG_WIDE, + /** @draft ICU 61 */ + UDATPG_ABBREVIATED, + /** @draft ICU 61 */ + UDATPG_NARROW +} UDateTimePGDisplayWidth; +#endif // U_HIDE_DRAFT_API + /** * Masks to control forcing the length of specified fields in the returned * pattern to match those in the skeleton (when this would not happen @@ -410,12 +425,14 @@ udatpg_setAppendItemName(UDateTimePatternGenerator *dtpg, /** * Getter corresponding to setAppendItemNames. Values below 0 or at or above - * UDATPG_FIELD_COUNT are illegal arguments. + * UDATPG_FIELD_COUNT are illegal arguments. Note: The more general function + * for getting date/time field display names is udatpg_getFieldDisplayName. * * @param dtpg a pointer to UDateTimePatternGenerator. * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD * @param pLength A pointer that will receive the length of the name for field. * @return name for field + * @see udatpg_getFieldDisplayName * @stable ICU 3.8 */ U_STABLE const UChar * U_EXPORT2 @@ -423,6 +440,40 @@ udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg, UDateTimePatternField field, int32_t *pLength); +#ifndef U_HIDE_DRAFT_API +/** + * The general interface to get a display name for a particular date/time field, + * in one of several possible display widths. + * + * @param dtpg + * A pointer to the UDateTimePatternGenerator object with the localized + * display names. + * @param field + * The desired UDateTimePatternField, such as UDATPG_ERA_FIELD. + * @param width + * The desired UDateTimePGDisplayWidth, such as UDATPG_ABBREVIATED. + * @param fieldName + * A pointer to a buffer to receive the NULL-terminated display name. If the name + * fits into fieldName but cannot be NULL-terminated (length == capacity) then + * the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the name doesn't + * fit into fieldName then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * @param capacity + * The size of fieldName (in UChars). + * @param pErrorCode + * A pointer to a UErrorCode to receive any errors + * @return + * The full length of the name; if greater than capacity, fieldName contains a + * truncated result. + * @draft ICU 61 + */ +U_DRAFT int32_t U_EXPORT2 +udatpg_getFieldDisplayName(const UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + UDateTimePGDisplayWidth width, + UChar *fieldName, int32_t capacity, + UErrorCode *pErrorCode); +#endif // U_HIDE_DRAFT_API + /** * The DateTimeFormat is a message format pattern used to compose date and * time patterns. The default pattern in the root locale is "{1} {0}", where diff --git a/deps/icu-small/source/i18n/unicode/unum.h b/deps/icu-small/source/i18n/unicode/unum.h index 9154bce661..0e7b9fffba 100644 --- a/deps/icu-small/source/i18n/unicode/unum.h +++ b/deps/icu-small/source/i18n/unicode/unum.h @@ -33,6 +33,9 @@ * * <h2> Number Format C API </h2> * + * <p><strong>IMPORTANT:</strong> New users with C++ capabilities are + * strongly encouraged to see if numberformatter.h fits their use case. + * * Number Format C API Provides functions for * formatting and parsing a number. Also provides methods for * determining which locales have number formats, and what their names @@ -559,7 +562,6 @@ unum_formatDouble( const UNumberFormat* fmt, UFieldPosition *pos, /* 0 if ignore */ UErrorCode* status); -#ifndef U_HIDE_DRAFT_API /** * Format a double using a UNumberFormat according to the UNumberFormat's locale, * and initialize a UFieldPositionIterator that enumerates the subcomponents of @@ -600,9 +602,9 @@ unum_formatDouble( const UNumberFormat* fmt, * @see unum_parseDouble * @see UFieldPositionIterator * @see UNumberFormatFields -* @draft ICU 59 +* @stable ICU 59 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 unum_formatDoubleForFields(const UNumberFormat* format, double number, UChar* result, @@ -610,7 +612,6 @@ unum_formatDoubleForFields(const UNumberFormat* format, UFieldPositionIterator* fpositer, UErrorCode* status); -#endif /* U_HIDE_DRAFT_API */ /** * Format a decimal number using a UNumberFormat. diff --git a/deps/icu-small/source/i18n/unicode/upluralrules.h b/deps/icu-small/source/i18n/unicode/upluralrules.h index 99d93a4e05..690846bc89 100644 --- a/deps/icu-small/source/i18n/unicode/upluralrules.h +++ b/deps/icu-small/source/i18n/unicode/upluralrules.h @@ -175,7 +175,6 @@ uplrules_selectWithFormat(const UPluralRules *uplrules, #endif /* U_HIDE_INTERNAL_API */ -#ifndef U_HIDE_DRAFT_API /** * Creates a string enumeration of all plural rule keywords used in this * UPluralRules object. The rule "other" is always present by default. @@ -184,12 +183,11 @@ uplrules_selectWithFormat(const UPluralRules *uplrules, * @param status A pointer to a UErrorCode to receive any errors. * @return a string enumeration over plural rule keywords, or NULL * upon error. The caller is responsible for closing the result. - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT UEnumeration* U_EXPORT2 +U_STABLE UEnumeration* U_EXPORT2 uplrules_getKeywords(const UPluralRules *uplrules, UErrorCode *status); -#endif /* U_HIDE_DRAFT_API */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/unicode/utrans.h b/deps/icu-small/source/i18n/unicode/utrans.h index a4158726ca..7672b4428f 100644 --- a/deps/icu-small/source/i18n/unicode/utrans.h +++ b/deps/icu-small/source/i18n/unicode/utrans.h @@ -382,7 +382,7 @@ utrans_openIDs(UErrorCode *pErrorCode); U_STABLE void U_EXPORT2 utrans_trans(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, int32_t start, int32_t* limit, UErrorCode* status); @@ -433,7 +433,7 @@ utrans_trans(const UTransliterator* trans, U_STABLE void U_EXPORT2 utrans_transIncremental(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, UTransPosition* pos, UErrorCode* status); diff --git a/deps/icu-small/source/i18n/uregex.cpp b/deps/icu-small/source/i18n/uregex.cpp index a5dee6241d..3703843634 100644 --- a/deps/icu-small/source/i18n/uregex.cpp +++ b/deps/icu-small/source/i18n/uregex.cpp @@ -1465,8 +1465,10 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, int32_t groupNum = 0; U_ASSERT(c == DOLLARSIGN); - UChar32 c32; - U16_GET(replacementText, 0, replIdx, replacementLength, c32); + UChar32 c32 = -1; + if (replIdx < replacementLength) { + U16_GET(replacementText, 0, replIdx, replacementLength, c32); + } if (u_isdigit(c32)) { int32_t numDigits = 0; int32_t numCaptureGroups = m->fPattern->fGroupMap->size(); diff --git a/deps/icu-small/source/i18n/utrans.cpp b/deps/icu-small/source/i18n/utrans.cpp index 62fd630d9e..31070dd43f 100644 --- a/deps/icu-small/source/i18n/utrans.cpp +++ b/deps/icu-small/source/i18n/utrans.cpp @@ -41,12 +41,12 @@ U_NAMESPACE_BEGIN class ReplaceableGlue : public Replaceable { UReplaceable *rep; - UReplaceableCallbacks *func; + const UReplaceableCallbacks *func; public: ReplaceableGlue(UReplaceable *replaceable, - UReplaceableCallbacks *funcCallback); + const UReplaceableCallbacks *funcCallback); virtual ~ReplaceableGlue(); @@ -88,7 +88,7 @@ protected: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, - UReplaceableCallbacks *funcCallback) + const UReplaceableCallbacks *funcCallback) : Replaceable() { this->rep = replaceable; @@ -398,7 +398,7 @@ utrans_openIDs(UErrorCode *pErrorCode) { U_CAPI void U_EXPORT2 utrans_trans(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, int32_t start, int32_t* limit, UErrorCode* status) { @@ -418,7 +418,7 @@ utrans_trans(const UTransliterator* trans, U_CAPI void U_EXPORT2 utrans_transIncremental(const UTransliterator* trans, UReplaceable* rep, - UReplaceableCallbacks* repFunc, + const UReplaceableCallbacks* repFunc, UTransPosition* pos, UErrorCode* status) { diff --git a/deps/icu-small/source/tools/escapesrc/escapesrc.cpp b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp index 13bfbd3789..f51a86ea96 100644 --- a/deps/icu-small/source/tools/escapesrc/escapesrc.cpp +++ b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp @@ -4,39 +4,71 @@ #include <stdio.h> #include <string> #include <stdlib.h> -#include <unistd.h> #include <errno.h> #include <string.h> #include <iostream> #include <fstream> -// with caution: +// We only use U8_* macros, which are entirely inline. #include "unicode/utf8.h" +// This contains a codepage and ISO 14882:1998 illegality table. +// Use "make gen-table" to rebuild it. +#include "cptbl.h" + +/** + * What is this? + * + * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code + * in utf-8 into something consumable by certain compilers (Solaris, xlC) + * which aren't quite standards compliant. + * + * - u"<unicode>" or u'<unicode>' gets converted to u"\uNNNN" or u'\uNNNN' + * - u8"<unicode>" gets converted to "\xAA\xBB\xCC\xDD" etc. + * (some compilers do not support the u8 prefix correctly.) + * - if the system is EBCDIC-based, that is used to correct the input characters. + * + * Usage: + * escapesrc infile.cpp outfile.cpp + * Normally this is invoked by the build stage, with a rule such as: + * + * _%.cpp: $(srcdir)/%.cpp + * @$(BINDIR)/escapesrc$(EXEEXT) $< $@ + * %.o: _%.cpp + * $(COMPILE.cc) ... $@ $< + * + * In the Makefiles, SKIP_ESCAPING=YES is used to prevent escapesrc.cpp + * from being itself escaped. + */ + + static const char kSPACE = 0x20, kTAB = 0x09, kLF = 0x0A, kCR = 0x0D; - // kHASH = 0x23, - // kSLASH = 0x2f, - // kSTAR = 0x2A, - -# include "cptbl.h" +// For convenience # define cp1047_to_8859(c) cp1047_8859_1[c] +// Our app's name std::string prog; +/** + * Give the usual 1-line documentation and exit + */ void usage() { fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str()); } - +/** + * Delete the output file (if any) + * We want to delete even if we didn't generate, because it might be stale. + */ int cleanup(const std::string &outfile) { const char *outstr = outfile.c_str(); if(outstr && *outstr) { - int rc = unlink(outstr); + int rc = std::remove(outstr); if(rc == 0) { fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr); return 0; @@ -44,7 +76,7 @@ int cleanup(const std::string &outfile) { if( errno == ENOENT ) { return 0; // File did not exist - no error. } else { - perror("unlink"); + perror("std::remove"); return 1; } } @@ -52,16 +84,12 @@ int cleanup(const std::string &outfile) { return 0; } -// inline bool hasNonAscii(const char *line, size_t len) { -// const unsigned char *uline = reinterpret_cast<const unsigned char*>(line); -// for(size_t i=0;i<len; i++) { -// if( uline[i] > 0x7F) { -// return true; -// } -// } -// return false; -// } - +/** + * Skip across any known whitespace. + * @param p startpoint + * @param e limit + * @return first non-whitespace char + */ inline const char *skipws(const char *p, const char *e) { for(;p<e;p++) { switch(*p) { @@ -77,30 +105,11 @@ inline const char *skipws(const char *p, const char *e) { return p; } -// inline bool isCommentOrEmpty(const char* line, size_t len) { -// const char *p = line; -// const char *e = line+len; -// p = skipws(p,e); -// if(p==e) { -// return true; // whitespace only -// } -// p++; -// switch(*p) { -// case kHASH: return true; // #directive -// case kSLASH: -// p++; -// if(p==e) return false; // single slash -// switch(*p) { -// case kSLASH: // '/ /' -// case kSTAR: // '/ *' -// return true; // start of comment -// default: return false; // something else -// } -// default: return false; // something else -// } -// /*NOTREACHED*/ -// } - +/** + * Append a byte, hex encoded + * @param outstr sstring to append to + * @param byte the byte to append + */ void appendByte(std::string &outstr, uint8_t byte) { char tmp2[5]; @@ -109,6 +118,11 @@ void appendByte(std::string &outstr, } /** + * Append the bytes from 'linestr' into outstr, with escaping + * @param outstr the output buffer + * @param linestr the input buffer + * @param pos in/out: the current char under consideration + * @param chars the number of chars to consider * @return true on failure */ bool appendUtf8(std::string &outstr, @@ -141,6 +155,7 @@ bool appendUtf8(std::string &outstr, } /** + * Fixup u8"x" * @param linestr string to mutate. Already escaped into \u format. * @param origpos beginning, points to 'u8"' * @param pos end, points to " @@ -184,9 +199,11 @@ bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) { } /** - * fix the string at the position - * false = no err - * true = had err + * fix the u"x"/u'x'/u8"x" string at the position + * u8'x' is not supported, sorry. + * @param linestr the input string + * @param pos the position + * @return false = no err, true = had err */ bool fixAt(std::string &linestr, size_t pos) { size_t origpos = pos; @@ -292,8 +309,12 @@ bool fixAt(std::string &linestr, size_t pos) { } /** + * Fixup an entire line * false = no err * true = had err + * @param no the line number (not used) + * @param linestr the string to fix + * @return true if any err, else false */ bool fixLine(int /*no*/, std::string &linestr) { const char *line = linestr.c_str(); @@ -304,17 +325,6 @@ bool fixLine(int /*no*/, std::string &linestr) { return false; // Nothing to do. No u' or u" detected } - // lines such as u8"\u0308" are all ASCII. - // // Quick Check: all ascii? - // if(!hasNonAscii(line, len)) { - // return false; // ASCII - // } - - // // comment or empty line? - // if(isCommentOrEmpty(line, len)) { - // return false; // Comment or just empty - // } - // start from the end and find all u" cases size_t pos = len = linestr.size(); while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) { @@ -345,6 +355,12 @@ bool fixLine(int /*no*/, std::string &linestr) { return false; } +/** + * Convert a whole file + * @param infile + * @param outfile + * @return 1 on err, 0 otherwise + */ int convert(const std::string &infile, const std::string &outfile) { fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str()); @@ -386,6 +402,9 @@ int convert(const std::string &infile, const std::string &outfile) { return 0; } +/** + * Main function + */ int main(int argc, const char *argv[]) { prog = argv[0]; @@ -399,6 +418,3 @@ int main(int argc, const char *argv[]) { return convert(infile, outfile); } - - -#include "utf_impl.cpp" |