diff options
author | Oswald Buddenhagen <oswald.buddenhagen@qt.io> | 2017-05-30 12:48:17 +0200 |
---|---|---|
committer | Oswald Buddenhagen <oswald.buddenhagen@qt.io> | 2017-05-30 12:48:17 +0200 |
commit | 881da28418d380042aa95a97f0cbd42560a64f7c (patch) | |
tree | a794dff3274695e99c651902dde93d934ea7a5af /Source/JavaScriptCore/yarr | |
parent | 7e104c57a70fdf551bb3d22a5d637cdcbc69dbea (diff) | |
parent | 0fcedcd17cc00d3dd44c718b3cb36c1033319671 (diff) | |
download | qtwebkit-881da28418d380042aa95a97f0cbd42560a64f7c.tar.gz |
Merge 'wip/next' into dev
Change-Id: Iff9ee5e23bb326c4371ec8ed81d56f2f05d680e9
Diffstat (limited to 'Source/JavaScriptCore/yarr')
-rw-r--r-- | Source/JavaScriptCore/yarr/RegularExpression.cpp | 185 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/RegularExpression.h | 62 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/Yarr.h | 2 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp | 34 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h | 18 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.cpp | 32 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrInterpreter.h | 18 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrJIT.cpp | 93 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrJIT.h | 39 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrParser.h | 13 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrPattern.cpp | 75 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrPattern.h | 77 | ||||
-rw-r--r-- | Source/JavaScriptCore/yarr/yarr.pri | 12 |
13 files changed, 493 insertions, 167 deletions
diff --git a/Source/JavaScriptCore/yarr/RegularExpression.cpp b/Source/JavaScriptCore/yarr/RegularExpression.cpp new file mode 100644 index 000000000..0c7089654 --- /dev/null +++ b/Source/JavaScriptCore/yarr/RegularExpression.cpp @@ -0,0 +1,185 @@ + +/* + * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. + * Copyright (C) 2008 Collabora Ltd. + * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "RegularExpression.h" + +#include "Yarr.h" +#include <wtf/Assertions.h> +#include <wtf/BumpPointerAllocator.h> + +namespace JSC { namespace Yarr { + +class RegularExpression::Private : public RefCounted<RegularExpression::Private> { +public: + static Ref<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) + { + return adoptRef(*new Private(pattern, caseSensitivity, multilineMode)); + } + + int lastMatchLength; + + unsigned m_numSubpatterns; + std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode; + +private: + Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) + : lastMatchLength(-1) + , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode)) + , m_constructionError(nullptr) + { + } + + std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) + { + JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &m_constructionError); + if (m_constructionError) { + LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError); + return nullptr; + } + + m_numSubpatterns = pattern.m_numSubpatterns; + + return JSC::Yarr::byteCompile(pattern, &m_regexAllocator); + } + + BumpPointerAllocator m_regexAllocator; + const char* m_constructionError; +}; + +RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode) + : d(Private::create(pattern, caseSensitivity, multilineMode)) +{ +} + +RegularExpression::RegularExpression(const RegularExpression& re) + : d(re.d) +{ +} + +RegularExpression::~RegularExpression() +{ +} + +RegularExpression& RegularExpression::operator=(const RegularExpression& re) +{ + d = re.d; + return *this; +} + +int RegularExpression::match(const String& str, int startFrom, int* matchLength) const +{ + if (!d->m_regExpByteCode) + return -1; + + if (str.isNull()) + return -1; + + int offsetVectorSize = (d->m_numSubpatterns + 1) * 2; + unsigned* offsetVector; + Vector<unsigned, 32> nonReturnedOvector; + + nonReturnedOvector.resize(offsetVectorSize); + offsetVector = nonReturnedOvector.data(); + + ASSERT(offsetVector); + for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++) + offsetVector[j] = JSC::Yarr::offsetNoMatch; + + unsigned result; + if (str.length() <= INT_MAX) + result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector); + else { + // This code can't handle unsigned offsets. Limit our processing to strings with offsets that + // can be represented as ints. + result = JSC::Yarr::offsetNoMatch; + } + + if (result == JSC::Yarr::offsetNoMatch) { + d->lastMatchLength = -1; + return -1; + } + + // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector. + d->lastMatchLength = offsetVector[1] - offsetVector[0]; + if (matchLength) + *matchLength = d->lastMatchLength; + return offsetVector[0]; +} + +int RegularExpression::searchRev(const String& str) const +{ + // FIXME: This could be faster if it actually searched backwards. + // Instead, it just searches forwards, multiple times until it finds the last match. + + int start = 0; + int pos; + int lastPos = -1; + int lastMatchLength = -1; + do { + int matchLength; + pos = match(str, start, &matchLength); + if (pos >= 0) { + if (pos + matchLength > lastPos + lastMatchLength) { + // replace last match if this one is later and not a subset of the last match + lastPos = pos; + lastMatchLength = matchLength; + } + start = pos + 1; + } + } while (pos != -1); + d->lastMatchLength = lastMatchLength; + return lastPos; +} + +int RegularExpression::matchedLength() const +{ + return d->lastMatchLength; +} + +void replace(String& string, const RegularExpression& target, const String& replacement) +{ + int index = 0; + while (index < static_cast<int>(string.length())) { + int matchLength; + index = target.match(string, index, &matchLength); + if (index < 0) + break; + string.replace(index, matchLength, replacement); + index += replacement.length(); + if (!matchLength) + break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* + } +} + +bool RegularExpression::isValid() const +{ + return d->m_regExpByteCode.get(); +} + +} } // namespace JSC::Yarr diff --git a/Source/JavaScriptCore/yarr/RegularExpression.h b/Source/JavaScriptCore/yarr/RegularExpression.h new file mode 100644 index 000000000..3298f0bd8 --- /dev/null +++ b/Source/JavaScriptCore/yarr/RegularExpression.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2003, 2008, 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RegularExpression_h +#define RegularExpression_h + +#include <wtf/text/WTFString.h> + +namespace JSC { namespace Yarr { + +enum MultilineMode { + MultilineDisabled, + MultilineEnabled +}; + +class JS_EXPORT_PRIVATE RegularExpression { + WTF_MAKE_FAST_ALLOCATED; +public: + RegularExpression(const String&, TextCaseSensitivity, MultilineMode = MultilineDisabled); + ~RegularExpression(); + + RegularExpression(const RegularExpression&); + RegularExpression& operator=(const RegularExpression&); + + int match(const String&, int startFrom = 0, int* matchLength = 0) const; + int searchRev(const String&) const; + + int matchedLength() const; + bool isValid() const; + +private: + class Private; + RefPtr<Private> d; +}; + +void JS_EXPORT_PRIVATE replace(String&, const RegularExpression&, const String&); + +} } // namespace JSC::Yarr + +#endif // RegularExpression_h diff --git a/Source/JavaScriptCore/yarr/Yarr.h b/Source/JavaScriptCore/yarr/Yarr.h index d393e9fa9..463623ea2 100644 --- a/Source/JavaScriptCore/yarr/Yarr.h +++ b/Source/JavaScriptCore/yarr/Yarr.h @@ -43,7 +43,7 @@ namespace JSC { namespace Yarr { #define YarrStackSpaceForBackTrackInfoParentheses 2 static const unsigned quantifyInfinite = UINT_MAX; -static const unsigned offsetNoMatch = (unsigned)-1; +static const unsigned offsetNoMatch = std::numeric_limits<unsigned>::max(); // The below limit restricts the number of "recursive" match calls in order to // avoid spending exponential time on complex regular expressions. diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp index 7bb3d08eb..52cb1a939 100644 --- a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp +++ b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp @@ -32,24 +32,24 @@ namespace JSC { namespace Yarr { #include <stdint.h> -uint16_t ucs2CharacterSet0[] = { 0x01c4u, 0x01c5u, 0x01c6u, 0 }; -uint16_t ucs2CharacterSet1[] = { 0x01c7u, 0x01c8u, 0x01c9u, 0 }; -uint16_t ucs2CharacterSet2[] = { 0x01cau, 0x01cbu, 0x01ccu, 0 }; -uint16_t ucs2CharacterSet3[] = { 0x01f1u, 0x01f2u, 0x01f3u, 0 }; -uint16_t ucs2CharacterSet4[] = { 0x0392u, 0x03b2u, 0x03d0u, 0 }; -uint16_t ucs2CharacterSet5[] = { 0x0395u, 0x03b5u, 0x03f5u, 0 }; -uint16_t ucs2CharacterSet6[] = { 0x0398u, 0x03b8u, 0x03d1u, 0 }; -uint16_t ucs2CharacterSet7[] = { 0x0345u, 0x0399u, 0x03b9u, 0x1fbeu, 0 }; -uint16_t ucs2CharacterSet8[] = { 0x039au, 0x03bau, 0x03f0u, 0 }; -uint16_t ucs2CharacterSet9[] = { 0x00b5u, 0x039cu, 0x03bcu, 0 }; -uint16_t ucs2CharacterSet10[] = { 0x03a0u, 0x03c0u, 0x03d6u, 0 }; -uint16_t ucs2CharacterSet11[] = { 0x03a1u, 0x03c1u, 0x03f1u, 0 }; -uint16_t ucs2CharacterSet12[] = { 0x03a3u, 0x03c2u, 0x03c3u, 0 }; -uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 }; -uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 }; +const uint16_t ucs2CharacterSet0[] = { 0x01c4u, 0x01c5u, 0x01c6u, 0 }; +const uint16_t ucs2CharacterSet1[] = { 0x01c7u, 0x01c8u, 0x01c9u, 0 }; +const uint16_t ucs2CharacterSet2[] = { 0x01cau, 0x01cbu, 0x01ccu, 0 }; +const uint16_t ucs2CharacterSet3[] = { 0x01f1u, 0x01f2u, 0x01f3u, 0 }; +const uint16_t ucs2CharacterSet4[] = { 0x0392u, 0x03b2u, 0x03d0u, 0 }; +const uint16_t ucs2CharacterSet5[] = { 0x0395u, 0x03b5u, 0x03f5u, 0 }; +const uint16_t ucs2CharacterSet6[] = { 0x0398u, 0x03b8u, 0x03d1u, 0 }; +const uint16_t ucs2CharacterSet7[] = { 0x0345u, 0x0399u, 0x03b9u, 0x1fbeu, 0 }; +const uint16_t ucs2CharacterSet8[] = { 0x039au, 0x03bau, 0x03f0u, 0 }; +const uint16_t ucs2CharacterSet9[] = { 0x00b5u, 0x039cu, 0x03bcu, 0 }; +const uint16_t ucs2CharacterSet10[] = { 0x03a0u, 0x03c0u, 0x03d6u, 0 }; +const uint16_t ucs2CharacterSet11[] = { 0x03a1u, 0x03c1u, 0x03f1u, 0 }; +const uint16_t ucs2CharacterSet12[] = { 0x03a3u, 0x03c2u, 0x03c3u, 0 }; +const uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 }; +const uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 }; static const size_t UCS2_CANONICALIZATION_SETS = 15; -uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = { +const uint16_t* const characterSetInfo[UCS2_CANONICALIZATION_SETS] = { ucs2CharacterSet0, ucs2CharacterSet1, ucs2CharacterSet2, @@ -68,7 +68,7 @@ uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = { }; const size_t UCS2_CANONICALIZATION_RANGES = 364; -UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = { +const UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = { { 0x0000u, 0x0040u, 0x0000u, CanonicalizeUnique }, { 0x0041u, 0x005au, 0x0020u, CanonicalizeRangeLo }, { 0x005bu, 0x0060u, 0x0000u, CanonicalizeUnique }, diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h index 9dce78200..d2df70720 100644 --- a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h +++ b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h @@ -27,7 +27,7 @@ #define YarrCanonicalizeUCS2_H #include <stdint.h> -#include <wtf/unicode/Unicode.h> +#include <unicode/utypes.h> namespace JSC { namespace Yarr { @@ -44,8 +44,8 @@ enum UCS2CanonicalizationType { }; struct UCS2CanonicalizationRange { uint16_t begin, end, value, type; }; extern const size_t UCS2_CANONICALIZATION_RANGES; -extern uint16_t* characterSetInfo[]; -extern UCS2CanonicalizationRange rangeInfo[]; +extern const uint16_t* const characterSetInfo[]; +extern const UCS2CanonicalizationRange rangeInfo[]; // This table is similar to the full rangeInfo table, however this maps from UCS2 codepoints to // the set of Latin1 codepoints that could match. @@ -60,14 +60,14 @@ extern const size_t LATIN_CANONICALIZATION_RANGES; extern LatinCanonicalizationRange latinRangeInfo[]; // This searches in log2 time over ~364 entries, so should typically result in 8 compares. -inline UCS2CanonicalizationRange* rangeInfoFor(UChar ch) +inline const UCS2CanonicalizationRange* rangeInfoFor(UChar ch) { - UCS2CanonicalizationRange* info = rangeInfo; + const UCS2CanonicalizationRange* info = rangeInfo; size_t entries = UCS2_CANONICALIZATION_RANGES; while (true) { size_t candidate = entries >> 1; - UCS2CanonicalizationRange* candidateInfo = info + candidate; + const UCS2CanonicalizationRange* candidateInfo = info + candidate; if (ch < candidateInfo->begin) entries = candidate; else if (ch <= candidateInfo->end) @@ -80,7 +80,7 @@ inline UCS2CanonicalizationRange* rangeInfoFor(UChar ch) } // Should only be called for characters that have one canonically matching value. -inline UChar getCanonicalPair(UCS2CanonicalizationRange* info, UChar ch) +inline UChar getCanonicalPair(const UCS2CanonicalizationRange* info, UChar ch) { ASSERT(ch >= info->begin && ch <= info->end); switch (info->type) { @@ -108,12 +108,12 @@ inline bool isCanonicallyUnique(UChar ch) // Returns true if values are equal, under the canonicalization rules. inline bool areCanonicallyEquivalent(UChar a, UChar b) { - UCS2CanonicalizationRange* info = rangeInfoFor(a); + const UCS2CanonicalizationRange* info = rangeInfoFor(a); switch (info->type) { case CanonicalizeUnique: return a == b; case CanonicalizeSet: { - for (uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) { + for (const uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) { if (a == b) return true; } diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp index f0312ea25..99b731588 100644 --- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp @@ -34,10 +34,6 @@ #include <wtf/text/CString.h> #include <wtf/text/WTFString.h> -#ifndef NDEBUG -#include <stdio.h> -#endif - using namespace WTF; namespace JSC { namespace Yarr { @@ -158,7 +154,7 @@ public: ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) { - size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); + size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t); allocatorPool = allocatorPool->ensureCapacity(size); RELEASE_ASSERT(allocatorPool); return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); @@ -711,6 +707,7 @@ public: return true; case QuantifierNonGreedy: ASSERT(backTrack->begin != notFound); + FALLTHROUGH; case QuantifierFixedCount: break; } @@ -731,6 +728,7 @@ public: context->term -= term.atom.parenthesesWidth; return false; } + FALLTHROUGH; case QuantifierNonGreedy: if (backTrack->begin == notFound) { backTrack->begin = input.getPos(); @@ -746,6 +744,7 @@ public: context->term -= term.atom.parenthesesWidth; return true; } + FALLTHROUGH; case QuantifierFixedCount: break; } @@ -1473,13 +1472,13 @@ public: m_currentAlternativeIndex = 0; } - PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator) + std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator) { regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); emitDisjunction(m_pattern.m_body); regexEnd(); - return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator)); + return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator); } void checkInput(unsigned count) @@ -1510,8 +1509,11 @@ public: void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) { if (m_pattern.m_ignoreCase) { - UChar lo = Unicode::toLower(ch); - UChar hi = Unicode::toUpper(ch); + ASSERT(u_tolower(ch) <= 0xFFFF); + ASSERT(u_toupper(ch) <= 0xFFFF); + + UChar lo = u_tolower(ch); + UChar hi = u_toupper(ch); if (lo != hi) { m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); @@ -1710,7 +1712,7 @@ public: unsigned subpatternId = parenthesesBegin.atom.subpatternId; unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; - OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); + auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); unsigned firstTermInParentheses = beginTerm + 1; parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2); @@ -1723,7 +1725,7 @@ public: m_bodyDisjunction->terms.shrink(beginTerm); m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition)); - m_allParenthesesInfo.append(parenthesesDisjunction.release()); + m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction)); m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; @@ -1776,7 +1778,7 @@ public: void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) { - m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); + m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize); m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); m_bodyDisjunction->terms[0].frameLocation = 0; m_currentAlternativeIndex = 0; @@ -1918,13 +1920,13 @@ public: private: YarrPattern& m_pattern; - OwnPtr<ByteDisjunction> m_bodyDisjunction; + std::unique_ptr<ByteDisjunction> m_bodyDisjunction; unsigned m_currentAlternativeIndex; Vector<ParenthesesStackEntry> m_parenthesesStack; - Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo; + Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; }; -PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) +std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) { return ByteCompiler(pattern).compile(allocator); } diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.h b/Source/JavaScriptCore/yarr/YarrInterpreter.h index bb64e6d91..dc2f3f77b 100644 --- a/Source/JavaScriptCore/yarr/YarrInterpreter.h +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.h @@ -27,8 +27,6 @@ #define YarrInterpreter_h #include "YarrPattern.h" -#include <wtf/PassOwnPtr.h> -#include <wtf/unicode/Unicode.h> namespace WTF { class BumpPointerAllocator; @@ -329,6 +327,8 @@ public: { } + size_t estimatedSizeInBytes() const { return terms.capacity() * sizeof(ByteTerm); } + Vector<ByteTerm> terms; unsigned m_numSubpatterns; unsigned m_frameSize; @@ -337,8 +337,8 @@ public: struct BytecodePattern { WTF_MAKE_FAST_ALLOCATED; public: - BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction> >& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator) - : m_body(body) + BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator) + : m_body(WTFMove(body)) , m_ignoreCase(pattern.m_ignoreCase) , m_multiline(pattern.m_multiline) , m_allocator(allocator) @@ -355,7 +355,9 @@ public: m_userCharacterClasses.shrinkToFit(); } - OwnPtr<ByteDisjunction> m_body; + size_t estimatedSizeInBytes() const { return m_body->estimatedSizeInBytes(); } + + std::unique_ptr<ByteDisjunction> m_body; bool m_ignoreCase; bool m_multiline; // Each BytecodePattern is associated with a RegExp, each RegExp is associated @@ -366,11 +368,11 @@ public: CharacterClass* wordcharCharacterClass; private: - Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo; - Vector<OwnPtr<CharacterClass> > m_userCharacterClasses; + Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo; + Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; }; -JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); +JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output); unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output); diff --git a/Source/JavaScriptCore/yarr/YarrJIT.cpp b/Source/JavaScriptCore/yarr/YarrJIT.cpp index d337cf797..89e2888a0 100644 --- a/Source/JavaScriptCore/yarr/YarrJIT.cpp +++ b/Source/JavaScriptCore/yarr/YarrJIT.cpp @@ -53,6 +53,17 @@ class YarrGenerator : private MacroAssembler { static const RegisterID returnRegister = ARMRegisters::r0; static const RegisterID returnRegister2 = ARMRegisters::r1; +#elif CPU(ARM64) + static const RegisterID input = ARM64Registers::x0; + static const RegisterID index = ARM64Registers::x1; + static const RegisterID length = ARM64Registers::x2; + static const RegisterID output = ARM64Registers::x3; + + static const RegisterID regT0 = ARM64Registers::x4; + static const RegisterID regT1 = ARM64Registers::x5; + + static const RegisterID returnRegister = ARM64Registers::x0; + static const RegisterID returnRegister2 = ARM64Registers::x1; #elif CPU(MIPS) static const RegisterID input = MIPSRegisters::a0; static const RegisterID index = MIPSRegisters::a1; @@ -322,17 +333,27 @@ class YarrGenerator : private MacroAssembler { jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); } + unsigned alignCallFrameSizeInBytes(unsigned callFrameSize) + { + callFrameSize *= sizeof(void*); + if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize) + CRASH(); + callFrameSize = (callFrameSize + 0x3f) & ~0x3f; + if (!callFrameSize) + CRASH(); + return callFrameSize; + } void initCallFrame() { unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; if (callFrameSize) - subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); + subPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister); } void removeCallFrame() { unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; if (callFrameSize) - addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); + addPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister); } // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns. @@ -1605,8 +1626,7 @@ class YarrGenerator : private MacroAssembler { if (term->quantityType == QuantifierFixedCount) inputOffset -= term->parentheses.disjunction->m_minimumSize; if (inputOffset) { - move(index, indexTemporary); - add32(Imm32(inputOffset), indexTemporary); + add32(Imm32(inputOffset), index, indexTemporary); setSubpatternStart(indexTemporary, term->parentheses.subpatternId); } else setSubpatternStart(index, term->parentheses.subpatternId); @@ -1618,16 +1638,14 @@ class YarrGenerator : private MacroAssembler { const RegisterID indexTemporary = regT0; ASSERT(term->quantityCount == 1); -#ifndef NDEBUG // Runtime ASSERT to make sure that the nested alternative handled the // "no input consumed" check. - if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { + if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { Jump pastBreakpoint; pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); - breakpoint(); + abortWithReason(YARRNoInputConsumed); pastBreakpoint.link(this); } -#endif // If the parenthese are capturing, store the ending index value to the // captures array, offsetting as necessary. @@ -1638,8 +1656,7 @@ class YarrGenerator : private MacroAssembler { if (term->capture() && compileMode == IncludeSubpatterns) { int inputOffset = term->inputPosition - m_checked; if (inputOffset) { - move(index, indexTemporary); - add32(Imm32(inputOffset), indexTemporary); + add32(Imm32(inputOffset), index, indexTemporary); setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); } else setSubpatternEnd(index, term->parentheses.subpatternId); @@ -1674,16 +1691,16 @@ class YarrGenerator : private MacroAssembler { } case OpParenthesesSubpatternTerminalEnd: { YarrOp& beginOp = m_ops[op.m_previousOp]; -#ifndef NDEBUG - PatternTerm* term = op.m_term; - - // Runtime ASSERT to make sure that the nested alternative handled the - // "no input consumed" check. - Jump pastBreakpoint; - pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); - breakpoint(); - pastBreakpoint.link(this); -#endif + if (!ASSERT_DISABLED) { + PatternTerm* term = op.m_term; + + // Runtime ASSERT to make sure that the nested alternative handled the + // "no input consumed" check. + Jump pastBreakpoint; + pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); + abortWithReason(YARRNoInputConsumed); + pastBreakpoint.link(this); + } // We know that the match is non-zero, we can accept it and // loop back up to the head of the subpattern. @@ -2325,7 +2342,7 @@ class YarrGenerator : private MacroAssembler { m_ops.append(alternativeBeginOpCode); m_ops.last().m_previousOp = notFound; m_ops.last().m_term = term; - Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; for (unsigned i = 0; i < alternatives.size(); ++i) { size_t lastOpIndex = m_ops.size() - 1; @@ -2376,7 +2393,7 @@ class YarrGenerator : private MacroAssembler { m_ops.append(OpSimpleNestedAlternativeBegin); m_ops.last().m_previousOp = notFound; m_ops.last().m_term = term; - Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives; for (unsigned i = 0; i < alternatives.size(); ++i) { size_t lastOpIndex = m_ops.size() - 1; @@ -2450,7 +2467,7 @@ class YarrGenerator : private MacroAssembler { // to return the failing result. void opCompileBody(PatternDisjunction* disjunction) { - Vector<OwnPtr<PatternAlternative> >& alternatives = disjunction->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives; size_t currentAlternativeIndex = 0; // Emit the 'once through' alternatives. @@ -2549,6 +2566,10 @@ class YarrGenerator : private MacroAssembler { if (compileMode == IncludeSubpatterns) loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); #endif +#elif CPU(ARM64) + // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. + zeroExtend32ToPtr(index, index); + zeroExtend32ToPtr(length, length); #elif CPU(ARM) push(ARMRegisters::r4); push(ARMRegisters::r5); @@ -2559,10 +2580,14 @@ class YarrGenerator : private MacroAssembler { #elif CPU(MIPS) // Do nothing. #endif + + store8(TrustedImm32(1), &m_vm->isExecutingInRegExpJIT); } void generateReturn() { + store8(TrustedImm32(0), &m_vm->isExecutingInRegExpJIT); + #if CPU(X86_64) #if OS(WINDOWS) // Store the return value in the allocated space pointed by rcx. @@ -2591,8 +2616,9 @@ class YarrGenerator : private MacroAssembler { } public: - YarrGenerator(YarrPattern& pattern, YarrCharSize charSize) - : m_pattern(pattern) + YarrGenerator(VM* vm, YarrPattern& pattern, YarrCharSize charSize) + : m_vm(vm) + , m_pattern(pattern) , m_charSize(charSize) , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo) , m_shouldFallBack(false) @@ -2620,11 +2646,8 @@ public: initCallFrame(); - // Compile the pattern to the internal 'YarrOp' representation. opCompileBody(m_pattern.m_body); - // If we encountered anything we can't handle in the JIT code - // (e.g. backreferences) then return early. if (m_shouldFallBack) { jitObject.setFallBack(true); return; @@ -2633,8 +2656,12 @@ public: generate(); backtrack(); - // Link & finalize the code. - LinkBuffer linkBuffer(*vm, this, REGEXP_CODE_ID); + LinkBuffer linkBuffer(*vm, *this, REGEXP_CODE_ID, JITCompilationCanFail); + if (linkBuffer.didFailToAllocate()) { + jitObject.setFallBack(true); + return; + } + m_backtrackingState.linkDataLabels(linkBuffer); if (compileMode == MatchOnly) { @@ -2652,6 +2679,8 @@ public: } private: + VM* m_vm; + YarrPattern& m_pattern; YarrCharSize m_charSize; @@ -2684,9 +2713,9 @@ private: void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode) { if (mode == MatchOnly) - YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject); + YarrGenerator<MatchOnly>(vm, pattern, charSize).compile(vm, jitObject); else - YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject); + YarrGenerator<IncludeSubpatterns>(vm, pattern, charSize).compile(vm, jitObject); } }} diff --git a/Source/JavaScriptCore/yarr/YarrJIT.h b/Source/JavaScriptCore/yarr/YarrJIT.h index b7ce7d38f..4d867607f 100644 --- a/Source/JavaScriptCore/yarr/YarrJIT.h +++ b/Source/JavaScriptCore/yarr/YarrJIT.h @@ -48,7 +48,7 @@ class ExecutablePool; namespace Yarr { class YarrCodeBlock { -#if CPU(X86_64) +#if CPU(X86_64) || CPU(ARM64) typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL; typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL; typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length) YARR_CALL; @@ -108,9 +108,44 @@ public: } #if ENABLE(REGEXP_TRACING) - void *getAddr() { return m_ref.code().executableAddress(); } + void *get8BitMatchOnlyAddr() + { + if (!has8BitCodeMatchOnly()) + return 0; + + return m_matchOnly8.code().executableAddress(); + } + + void *get16BitMatchOnlyAddr() + { + if (!has16BitCodeMatchOnly()) + return 0; + + return m_matchOnly16.code().executableAddress(); + } + + void *get8BitMatchAddr() + { + if (!has8BitCode()) + return 0; + + return m_ref8.code().executableAddress(); + } + + void *get16BitMatchAddr() + { + if (!has16BitCode()) + return 0; + + return m_ref16.code().executableAddress(); + } #endif + size_t size() const + { + return m_ref8.size() + m_ref16.size() + m_matchOnly8.size() + m_matchOnly16.size(); + } + void clear() { m_ref8 = MacroAssemblerCodeRef(); diff --git a/Source/JavaScriptCore/yarr/YarrParser.h b/Source/JavaScriptCore/yarr/YarrParser.h index 8c5d71b5f..761acb557 100644 --- a/Source/JavaScriptCore/yarr/YarrParser.h +++ b/Source/JavaScriptCore/yarr/YarrParser.h @@ -29,7 +29,6 @@ #include "Yarr.h" #include <wtf/ASCIICType.h> #include <wtf/text/WTFString.h> -#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { @@ -119,6 +118,7 @@ private: return; } // Otherwise just fall through - cached character so treat this as Empty. + FALLTHROUGH; case Empty: m_character = ch; @@ -168,7 +168,7 @@ private: case CachedCharacter: // Flush the currently cached character, then fall through. m_delegate.atomCharacterClassAtom(m_character); - + FALLTHROUGH; case Empty: case AfterCharacterClass: m_state = AfterCharacterClass; @@ -186,7 +186,7 @@ private: case CachedCharacterHyphen: m_delegate.atomCharacterClassAtom(m_character); m_delegate.atomCharacterClassAtom('-'); - // fall through + FALLTHROUGH; case AfterCharacterClassHyphen: m_delegate.atomCharacterClassBuiltIn(classID, invert); m_state = Empty; @@ -232,7 +232,7 @@ private: : m_delegate(delegate) , m_backReferenceLimit(backReferenceLimit) , m_err(NoError) - , m_data(pattern.getCharacters<CharType>()) + , m_data(pattern.characters<CharType>()) , m_size(pattern.length()) , m_index(0) , m_parenthesesNestingDepth(0) @@ -349,6 +349,7 @@ private: } // Fall-through to handle this as an octal escape. + FALLTHROUGH; } // Octal escape @@ -656,7 +657,9 @@ private: } restoreState(state); - } // if we did not find a complete quantifer, fall through to the default case. + } + // if we did not find a complete quantifer, fall through to the default case. + FALLTHROUGH; default: m_delegate.atomPatternCharacter(consume()); diff --git a/Source/JavaScriptCore/yarr/YarrPattern.cpp b/Source/JavaScriptCore/yarr/YarrPattern.cpp index 3ce0216e5..00339b755 100644 --- a/Source/JavaScriptCore/yarr/YarrPattern.cpp +++ b/Source/JavaScriptCore/yarr/YarrPattern.cpp @@ -84,21 +84,21 @@ public: } // Add multiple matches, if necessary. - UCS2CanonicalizationRange* info = rangeInfoFor(ch); + const UCS2CanonicalizationRange* info = rangeInfoFor(ch); if (info->type == CanonicalizeUnique) addSorted(m_matchesUnicode, ch); else putUnicodeIgnoreCase(ch, info); } - void putUnicodeIgnoreCase(UChar ch, UCS2CanonicalizationRange* info) + void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info) { ASSERT(m_isCaseInsensitive); ASSERT(ch > 0x7f); ASSERT(ch >= info->begin && ch <= info->end); ASSERT(info->type != CanonicalizeUnique); if (info->type == CanonicalizeSet) { - for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set) + for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set) addSorted(m_matchesUnicode, ch); } else { addSorted(m_matchesUnicode, ch); @@ -129,7 +129,7 @@ public: if (!m_isCaseInsensitive) return; - UCS2CanonicalizationRange* info = rangeInfoFor(lo); + const UCS2CanonicalizationRange* info = rangeInfoFor(lo); while (true) { // Handle the range [lo .. end] UChar end = std::min<UChar>(info->end, hi); @@ -140,7 +140,7 @@ public: break; case CanonicalizeSet: { UChar ch; - for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set) + for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set) addSorted(m_matchesUnicode, ch); break; } @@ -175,16 +175,16 @@ public: } - PassOwnPtr<CharacterClass> charClass() + std::unique_ptr<CharacterClass> charClass() { - OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass); + auto characterClass = std::make_unique<CharacterClass>(); characterClass->m_matches.swap(m_matches); characterClass->m_ranges.swap(m_ranges); characterClass->m_matchesUnicode.swap(m_matchesUnicode); characterClass->m_rangesUnicode.swap(m_rangesUnicode); - return characterClass.release(); + return characterClass; } private: @@ -274,10 +274,10 @@ public: , m_characterClassConstructor(pattern.m_ignoreCase) , m_invertParentheticalAssertion(false) { - OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction); + auto body = std::make_unique<PatternDisjunction>(); m_pattern.m_body = body.get(); m_alternative = body->addNewAlternative(); - m_pattern.m_disjunctions.append(body.release()); + m_pattern.m_disjunctions.append(WTFMove(body)); } ~YarrPatternConstructor() @@ -289,15 +289,15 @@ public: m_pattern.reset(); m_characterClassConstructor.reset(); - OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction); + auto body = std::make_unique<PatternDisjunction>(); m_pattern.m_body = body.get(); m_alternative = body->addNewAlternative(); - m_pattern.m_disjunctions.append(body.release()); + m_pattern.m_disjunctions.append(WTFMove(body)); } void assertionBOL() { - if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) { + if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) { m_alternative->m_startsWithBOL = true; m_alternative->m_containsBOL = true; m_pattern.m_containsBOL = true; @@ -322,16 +322,16 @@ public: return; } - UCS2CanonicalizationRange* info = rangeInfoFor(ch); + const UCS2CanonicalizationRange* info = rangeInfoFor(ch); if (info->type == CanonicalizeUnique) { m_alternative->m_terms.append(PatternTerm(ch)); return; } m_characterClassConstructor.putUnicodeIgnoreCase(ch, info); - OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass(); + auto newCharacterClass = m_characterClassConstructor.charClass(); m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false)); - m_pattern.m_userCharacterClasses.append(newCharacterClass.release()); + m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass)); } void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) @@ -391,9 +391,9 @@ public: void atomCharacterClassEnd() { - OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass(); + auto newCharacterClass = m_characterClassConstructor.charClass(); m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass)); - m_pattern.m_userCharacterClasses.append(newCharacterClass.release()); + m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass)); } void atomParenthesesSubpatternBegin(bool capture = true) @@ -402,19 +402,19 @@ public: if (capture) m_pattern.m_numSubpatterns++; - OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative)); + auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative); m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false)); m_alternative = parenthesesDisjunction->addNewAlternative(); - m_pattern.m_disjunctions.append(parenthesesDisjunction.release()); + m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction)); } void atomParentheticalAssertionBegin(bool invert = false) { - OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative)); + auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative); m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert)); m_alternative = parenthesesDisjunction->addNewAlternative(); m_invertParentheticalAssertion = invert; - m_pattern.m_disjunctions.append(parenthesesDisjunction.release()); + m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction)); } void atomParenthesesEnd() @@ -479,12 +479,12 @@ public: // skip alternatives with m_startsWithBOL set true. PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false) { - OwnPtr<PatternDisjunction> newDisjunction; + std::unique_ptr<PatternDisjunction> newDisjunction; for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { PatternAlternative* alternative = disjunction->m_alternatives[alt].get(); if (!filterStartsWithBOL || !alternative->m_startsWithBOL) { if (!newDisjunction) { - newDisjunction = adoptPtr(new PatternDisjunction()); + newDisjunction = std::make_unique<PatternDisjunction>(); newDisjunction->m_parent = disjunction->m_parent; } PatternAlternative* newAlternative = newDisjunction->addNewAlternative(); @@ -498,7 +498,7 @@ public: return 0; PatternDisjunction* copiedDisjunction = newDisjunction.get(); - m_pattern.m_disjunctions.append(newDisjunction.release()); + m_pattern.m_disjunctions.append(WTFMove(newDisjunction)); return copiedDisjunction; } @@ -666,6 +666,8 @@ public: minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize); maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize); hasFixedSize &= alternative->m_hasFixedSize; + if (alternative->m_minimumSize > INT_MAX) + m_pattern.m_containsUnsignedLengthPattern = true; } ASSERT(minimumInputSize != UINT_MAX); @@ -696,7 +698,7 @@ public: if (m_pattern.m_numSubpatterns) return; - Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; for (size_t i = 0; i < alternatives.size(); ++i) { Vector<PatternTerm>& terms = alternatives[i]->m_terms; if (terms.size()) { @@ -737,11 +739,12 @@ public: } } - bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex) + bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t endIndex) { Vector<PatternTerm>& terms = alternative->m_terms; - for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) { + ASSERT(endIndex <= terms.size()); + for (size_t termIndex = firstTermIndex; termIndex < endIndex; ++termIndex) { PatternTerm& term = terms[termIndex]; if (term.m_capture) @@ -750,7 +753,7 @@ public: if (term.type == PatternTerm::TypeParenthesesSubpattern) { PatternDisjunction* nestedDisjunction = term.parentheses.disjunction; for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) { - if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1)) + if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size())) return true; } } @@ -766,7 +769,7 @@ public: // beginning and the end of the match. void optimizeDotStarWrappedExpressions() { - Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives; + Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives; if (alternatives.size() != 1) return; @@ -775,7 +778,7 @@ public: if (terms.size() >= 3) { bool startsWithBOL = false; bool endsWithEOL = false; - size_t termIndex, firstExpressionTerm, lastExpressionTerm; + size_t termIndex, firstExpressionTerm; termIndex = 0; if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) { @@ -798,14 +801,13 @@ public: PatternTerm& lastNonAnchorTerm = terms[termIndex]; if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy)) return; - - lastExpressionTerm = termIndex - 1; - if (firstExpressionTerm > lastExpressionTerm) + size_t endIndex = termIndex; + if (firstExpressionTerm >= endIndex) return; - if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) { - for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex) + if (!containsCapturingTerms(alternative, firstExpressionTerm, endIndex)) { + for (termIndex = terms.size() - 1; termIndex >= endIndex; --termIndex) terms.remove(termIndex); for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex) @@ -864,6 +866,7 @@ YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, , m_multiline(multiline) , m_containsBackreferences(false) , m_containsBOL(false) + , m_containsUnsignedLengthPattern(false) , m_numSubpatterns(0) , m_maxBackReference(0) , newlineCached(0) diff --git a/Source/JavaScriptCore/yarr/YarrPattern.h b/Source/JavaScriptCore/yarr/YarrPattern.h index e7d187c2b..5482de5af 100644 --- a/Source/JavaScriptCore/yarr/YarrPattern.h +++ b/Source/JavaScriptCore/yarr/YarrPattern.h @@ -28,12 +28,9 @@ #define YarrPattern_h #include <wtf/CheckedArithmetic.h> -#include <wtf/OwnPtr.h> -#include <wtf/PassOwnPtr.h> #include <wtf/RefCounted.h> #include <wtf/Vector.h> #include <wtf/text/WTFString.h> -#include <wtf/unicode/Unicode.h> namespace JSC { namespace Yarr { @@ -270,12 +267,11 @@ public: PatternAlternative* addNewAlternative() { - PatternAlternative* alternative = new PatternAlternative(this); - m_alternatives.append(adoptPtr(alternative)); - return alternative; + m_alternatives.append(std::make_unique<PatternAlternative>(this)); + return static_cast<PatternAlternative*>(m_alternatives.last().get()); } - Vector<OwnPtr<PatternAlternative> > m_alternatives; + Vector<std::unique_ptr<PatternAlternative>> m_alternatives; PatternAlternative* m_parent; unsigned m_minimumSize; unsigned m_callFrameSize; @@ -286,13 +282,13 @@ public: // (please to be calling newlineCharacterClass() et al on your // friendly neighborhood YarrPattern instance to get nicely // cached copies). -CharacterClass* newlineCreate(); -CharacterClass* digitsCreate(); -CharacterClass* spacesCreate(); -CharacterClass* wordcharCreate(); -CharacterClass* nondigitsCreate(); -CharacterClass* nonspacesCreate(); -CharacterClass* nonwordcharCreate(); +std::unique_ptr<CharacterClass> newlineCreate(); +std::unique_ptr<CharacterClass> digitsCreate(); +std::unique_ptr<CharacterClass> spacesCreate(); +std::unique_ptr<CharacterClass> wordcharCreate(); +std::unique_ptr<CharacterClass> nondigitsCreate(); +std::unique_ptr<CharacterClass> nonspacesCreate(); +std::unique_ptr<CharacterClass> nonwordcharCreate(); struct TermChain { TermChain(PatternTerm term) @@ -313,6 +309,7 @@ struct YarrPattern { m_containsBackreferences = false; m_containsBOL = false; + m_containsUnsignedLengthPattern = false; newlineCached = 0; digitsCached = 0; @@ -331,46 +328,65 @@ struct YarrPattern { return m_maxBackReference > m_numSubpatterns; } + bool containsUnsignedLengthPattern() + { + return m_containsUnsignedLengthPattern; + } + CharacterClass* newlineCharacterClass() { - if (!newlineCached) - m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate())); + if (!newlineCached) { + m_userCharacterClasses.append(newlineCreate()); + newlineCached = m_userCharacterClasses.last().get(); + } return newlineCached; } CharacterClass* digitsCharacterClass() { - if (!digitsCached) - m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate())); + if (!digitsCached) { + m_userCharacterClasses.append(digitsCreate()); + digitsCached = m_userCharacterClasses.last().get(); + } return digitsCached; } CharacterClass* spacesCharacterClass() { - if (!spacesCached) - m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate())); + if (!spacesCached) { + m_userCharacterClasses.append(spacesCreate()); + spacesCached = m_userCharacterClasses.last().get(); + } return spacesCached; } CharacterClass* wordcharCharacterClass() { - if (!wordcharCached) - m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate())); + if (!wordcharCached) { + m_userCharacterClasses.append(wordcharCreate()); + wordcharCached = m_userCharacterClasses.last().get(); + } return wordcharCached; } CharacterClass* nondigitsCharacterClass() { - if (!nondigitsCached) - m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate())); + if (!nondigitsCached) { + m_userCharacterClasses.append(nondigitsCreate()); + nondigitsCached = m_userCharacterClasses.last().get(); + } return nondigitsCached; } CharacterClass* nonspacesCharacterClass() { - if (!nonspacesCached) - m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate())); + if (!nonspacesCached) { + m_userCharacterClasses.append(nonspacesCreate()); + nonspacesCached = m_userCharacterClasses.last().get(); + } return nonspacesCached; } CharacterClass* nonwordcharCharacterClass() { - if (!nonwordcharCached) - m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate())); + if (!nonwordcharCached) { + m_userCharacterClasses.append(nonwordcharCreate()); + nonwordcharCached = m_userCharacterClasses.last().get(); + } return nonwordcharCached; } @@ -378,11 +394,12 @@ struct YarrPattern { bool m_multiline : 1; bool m_containsBackreferences : 1; bool m_containsBOL : 1; + bool m_containsUnsignedLengthPattern : 1; unsigned m_numSubpatterns; unsigned m_maxBackReference; PatternDisjunction* m_body; - Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions; - Vector<OwnPtr<CharacterClass> > m_userCharacterClasses; + Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions; + Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses; private: const char* compile(const String& patternString); diff --git a/Source/JavaScriptCore/yarr/yarr.pri b/Source/JavaScriptCore/yarr/yarr.pri deleted file mode 100644 index 7e9b4d3f3..000000000 --- a/Source/JavaScriptCore/yarr/yarr.pri +++ /dev/null @@ -1,12 +0,0 @@ -# ------------------------------------------------------------------- -# Project file for YARR -# -# See 'Tools/qmake/README' for an overview of the build system -# ------------------------------------------------------------------- - -SOURCES += \ - $$PWD/YarrInterpreter.cpp \ - $$PWD/YarrPattern.cpp \ - $$PWD/YarrSyntaxChecker.cpp \ - $$PWD/YarrCanonicalizeUCS2.cpp - |