summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/yarr
diff options
context:
space:
mode:
authorOswald Buddenhagen <oswald.buddenhagen@qt.io>2017-05-30 12:48:17 +0200
committerOswald Buddenhagen <oswald.buddenhagen@qt.io>2017-05-30 12:48:17 +0200
commit881da28418d380042aa95a97f0cbd42560a64f7c (patch)
treea794dff3274695e99c651902dde93d934ea7a5af /Source/JavaScriptCore/yarr
parent7e104c57a70fdf551bb3d22a5d637cdcbc69dbea (diff)
parent0fcedcd17cc00d3dd44c718b3cb36c1033319671 (diff)
downloadqtwebkit-881da28418d380042aa95a97f0cbd42560a64f7c.tar.gz
Merge 'wip/next' into dev
Change-Id: Iff9ee5e23bb326c4371ec8ed81d56f2f05d680e9
Diffstat (limited to 'Source/JavaScriptCore/yarr')
-rw-r--r--Source/JavaScriptCore/yarr/RegularExpression.cpp185
-rw-r--r--Source/JavaScriptCore/yarr/RegularExpression.h62
-rw-r--r--Source/JavaScriptCore/yarr/Yarr.h2
-rw-r--r--Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp34
-rw-r--r--Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h18
-rw-r--r--Source/JavaScriptCore/yarr/YarrInterpreter.cpp32
-rw-r--r--Source/JavaScriptCore/yarr/YarrInterpreter.h18
-rw-r--r--Source/JavaScriptCore/yarr/YarrJIT.cpp93
-rw-r--r--Source/JavaScriptCore/yarr/YarrJIT.h39
-rw-r--r--Source/JavaScriptCore/yarr/YarrParser.h13
-rw-r--r--Source/JavaScriptCore/yarr/YarrPattern.cpp75
-rw-r--r--Source/JavaScriptCore/yarr/YarrPattern.h77
-rw-r--r--Source/JavaScriptCore/yarr/yarr.pri12
13 files changed, 493 insertions, 167 deletions
diff --git a/Source/JavaScriptCore/yarr/RegularExpression.cpp b/Source/JavaScriptCore/yarr/RegularExpression.cpp
new file mode 100644
index 000000000..0c7089654
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/RegularExpression.cpp
@@ -0,0 +1,185 @@
+
+/*
+ * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Collabora Ltd.
+ * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegularExpression.h"
+
+#include "Yarr.h"
+#include <wtf/Assertions.h>
+#include <wtf/BumpPointerAllocator.h>
+
+namespace JSC { namespace Yarr {
+
+class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
+public:
+ static Ref<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ {
+ return adoptRef(*new Private(pattern, caseSensitivity, multilineMode));
+ }
+
+ int lastMatchLength;
+
+ unsigned m_numSubpatterns;
+ std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
+
+private:
+ Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ : lastMatchLength(-1)
+ , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode))
+ , m_constructionError(nullptr)
+ {
+ }
+
+ std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ {
+ JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &m_constructionError);
+ if (m_constructionError) {
+ LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError);
+ return nullptr;
+ }
+
+ m_numSubpatterns = pattern.m_numSubpatterns;
+
+ return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
+ }
+
+ BumpPointerAllocator m_regexAllocator;
+ const char* m_constructionError;
+};
+
+RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
+ : d(Private::create(pattern, caseSensitivity, multilineMode))
+{
+}
+
+RegularExpression::RegularExpression(const RegularExpression& re)
+ : d(re.d)
+{
+}
+
+RegularExpression::~RegularExpression()
+{
+}
+
+RegularExpression& RegularExpression::operator=(const RegularExpression& re)
+{
+ d = re.d;
+ return *this;
+}
+
+int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
+{
+ if (!d->m_regExpByteCode)
+ return -1;
+
+ if (str.isNull())
+ return -1;
+
+ int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
+ unsigned* offsetVector;
+ Vector<unsigned, 32> nonReturnedOvector;
+
+ nonReturnedOvector.resize(offsetVectorSize);
+ offsetVector = nonReturnedOvector.data();
+
+ ASSERT(offsetVector);
+ for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
+ offsetVector[j] = JSC::Yarr::offsetNoMatch;
+
+ unsigned result;
+ if (str.length() <= INT_MAX)
+ result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector);
+ else {
+ // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
+ // can be represented as ints.
+ result = JSC::Yarr::offsetNoMatch;
+ }
+
+ if (result == JSC::Yarr::offsetNoMatch) {
+ d->lastMatchLength = -1;
+ return -1;
+ }
+
+ // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
+ d->lastMatchLength = offsetVector[1] - offsetVector[0];
+ if (matchLength)
+ *matchLength = d->lastMatchLength;
+ return offsetVector[0];
+}
+
+int RegularExpression::searchRev(const String& str) const
+{
+ // FIXME: This could be faster if it actually searched backwards.
+ // Instead, it just searches forwards, multiple times until it finds the last match.
+
+ int start = 0;
+ int pos;
+ int lastPos = -1;
+ int lastMatchLength = -1;
+ do {
+ int matchLength;
+ pos = match(str, start, &matchLength);
+ if (pos >= 0) {
+ if (pos + matchLength > lastPos + lastMatchLength) {
+ // replace last match if this one is later and not a subset of the last match
+ lastPos = pos;
+ lastMatchLength = matchLength;
+ }
+ start = pos + 1;
+ }
+ } while (pos != -1);
+ d->lastMatchLength = lastMatchLength;
+ return lastPos;
+}
+
+int RegularExpression::matchedLength() const
+{
+ return d->lastMatchLength;
+}
+
+void replace(String& string, const RegularExpression& target, const String& replacement)
+{
+ int index = 0;
+ while (index < static_cast<int>(string.length())) {
+ int matchLength;
+ index = target.match(string, index, &matchLength);
+ if (index < 0)
+ break;
+ string.replace(index, matchLength, replacement);
+ index += replacement.length();
+ if (!matchLength)
+ break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
+ }
+}
+
+bool RegularExpression::isValid() const
+{
+ return d->m_regExpByteCode.get();
+}
+
+} } // namespace JSC::Yarr
diff --git a/Source/JavaScriptCore/yarr/RegularExpression.h b/Source/JavaScriptCore/yarr/RegularExpression.h
new file mode 100644
index 000000000..3298f0bd8
--- /dev/null
+++ b/Source/JavaScriptCore/yarr/RegularExpression.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2003, 2008, 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegularExpression_h
+#define RegularExpression_h
+
+#include <wtf/text/WTFString.h>
+
+namespace JSC { namespace Yarr {
+
+enum MultilineMode {
+ MultilineDisabled,
+ MultilineEnabled
+};
+
+class JS_EXPORT_PRIVATE RegularExpression {
+ WTF_MAKE_FAST_ALLOCATED;
+public:
+ RegularExpression(const String&, TextCaseSensitivity, MultilineMode = MultilineDisabled);
+ ~RegularExpression();
+
+ RegularExpression(const RegularExpression&);
+ RegularExpression& operator=(const RegularExpression&);
+
+ int match(const String&, int startFrom = 0, int* matchLength = 0) const;
+ int searchRev(const String&) const;
+
+ int matchedLength() const;
+ bool isValid() const;
+
+private:
+ class Private;
+ RefPtr<Private> d;
+};
+
+void JS_EXPORT_PRIVATE replace(String&, const RegularExpression&, const String&);
+
+} } // namespace JSC::Yarr
+
+#endif // RegularExpression_h
diff --git a/Source/JavaScriptCore/yarr/Yarr.h b/Source/JavaScriptCore/yarr/Yarr.h
index d393e9fa9..463623ea2 100644
--- a/Source/JavaScriptCore/yarr/Yarr.h
+++ b/Source/JavaScriptCore/yarr/Yarr.h
@@ -43,7 +43,7 @@ namespace JSC { namespace Yarr {
#define YarrStackSpaceForBackTrackInfoParentheses 2
static const unsigned quantifyInfinite = UINT_MAX;
-static const unsigned offsetNoMatch = (unsigned)-1;
+static const unsigned offsetNoMatch = std::numeric_limits<unsigned>::max();
// The below limit restricts the number of "recursive" match calls in order to
// avoid spending exponential time on complex regular expressions.
diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp
index 7bb3d08eb..52cb1a939 100644
--- a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp
+++ b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp
@@ -32,24 +32,24 @@ namespace JSC { namespace Yarr {
#include <stdint.h>
-uint16_t ucs2CharacterSet0[] = { 0x01c4u, 0x01c5u, 0x01c6u, 0 };
-uint16_t ucs2CharacterSet1[] = { 0x01c7u, 0x01c8u, 0x01c9u, 0 };
-uint16_t ucs2CharacterSet2[] = { 0x01cau, 0x01cbu, 0x01ccu, 0 };
-uint16_t ucs2CharacterSet3[] = { 0x01f1u, 0x01f2u, 0x01f3u, 0 };
-uint16_t ucs2CharacterSet4[] = { 0x0392u, 0x03b2u, 0x03d0u, 0 };
-uint16_t ucs2CharacterSet5[] = { 0x0395u, 0x03b5u, 0x03f5u, 0 };
-uint16_t ucs2CharacterSet6[] = { 0x0398u, 0x03b8u, 0x03d1u, 0 };
-uint16_t ucs2CharacterSet7[] = { 0x0345u, 0x0399u, 0x03b9u, 0x1fbeu, 0 };
-uint16_t ucs2CharacterSet8[] = { 0x039au, 0x03bau, 0x03f0u, 0 };
-uint16_t ucs2CharacterSet9[] = { 0x00b5u, 0x039cu, 0x03bcu, 0 };
-uint16_t ucs2CharacterSet10[] = { 0x03a0u, 0x03c0u, 0x03d6u, 0 };
-uint16_t ucs2CharacterSet11[] = { 0x03a1u, 0x03c1u, 0x03f1u, 0 };
-uint16_t ucs2CharacterSet12[] = { 0x03a3u, 0x03c2u, 0x03c3u, 0 };
-uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 };
-uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 };
+const uint16_t ucs2CharacterSet0[] = { 0x01c4u, 0x01c5u, 0x01c6u, 0 };
+const uint16_t ucs2CharacterSet1[] = { 0x01c7u, 0x01c8u, 0x01c9u, 0 };
+const uint16_t ucs2CharacterSet2[] = { 0x01cau, 0x01cbu, 0x01ccu, 0 };
+const uint16_t ucs2CharacterSet3[] = { 0x01f1u, 0x01f2u, 0x01f3u, 0 };
+const uint16_t ucs2CharacterSet4[] = { 0x0392u, 0x03b2u, 0x03d0u, 0 };
+const uint16_t ucs2CharacterSet5[] = { 0x0395u, 0x03b5u, 0x03f5u, 0 };
+const uint16_t ucs2CharacterSet6[] = { 0x0398u, 0x03b8u, 0x03d1u, 0 };
+const uint16_t ucs2CharacterSet7[] = { 0x0345u, 0x0399u, 0x03b9u, 0x1fbeu, 0 };
+const uint16_t ucs2CharacterSet8[] = { 0x039au, 0x03bau, 0x03f0u, 0 };
+const uint16_t ucs2CharacterSet9[] = { 0x00b5u, 0x039cu, 0x03bcu, 0 };
+const uint16_t ucs2CharacterSet10[] = { 0x03a0u, 0x03c0u, 0x03d6u, 0 };
+const uint16_t ucs2CharacterSet11[] = { 0x03a1u, 0x03c1u, 0x03f1u, 0 };
+const uint16_t ucs2CharacterSet12[] = { 0x03a3u, 0x03c2u, 0x03c3u, 0 };
+const uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 };
+const uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 };
static const size_t UCS2_CANONICALIZATION_SETS = 15;
-uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {
+const uint16_t* const characterSetInfo[UCS2_CANONICALIZATION_SETS] = {
ucs2CharacterSet0,
ucs2CharacterSet1,
ucs2CharacterSet2,
@@ -68,7 +68,7 @@ uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {
};
const size_t UCS2_CANONICALIZATION_RANGES = 364;
-UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {
+const UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x0000u, 0x0040u, 0x0000u, CanonicalizeUnique },
{ 0x0041u, 0x005au, 0x0020u, CanonicalizeRangeLo },
{ 0x005bu, 0x0060u, 0x0000u, CanonicalizeUnique },
diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h
index 9dce78200..d2df70720 100644
--- a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h
+++ b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h
@@ -27,7 +27,7 @@
#define YarrCanonicalizeUCS2_H
#include <stdint.h>
-#include <wtf/unicode/Unicode.h>
+#include <unicode/utypes.h>
namespace JSC { namespace Yarr {
@@ -44,8 +44,8 @@ enum UCS2CanonicalizationType {
};
struct UCS2CanonicalizationRange { uint16_t begin, end, value, type; };
extern const size_t UCS2_CANONICALIZATION_RANGES;
-extern uint16_t* characterSetInfo[];
-extern UCS2CanonicalizationRange rangeInfo[];
+extern const uint16_t* const characterSetInfo[];
+extern const UCS2CanonicalizationRange rangeInfo[];
// This table is similar to the full rangeInfo table, however this maps from UCS2 codepoints to
// the set of Latin1 codepoints that could match.
@@ -60,14 +60,14 @@ extern const size_t LATIN_CANONICALIZATION_RANGES;
extern LatinCanonicalizationRange latinRangeInfo[];
// This searches in log2 time over ~364 entries, so should typically result in 8 compares.
-inline UCS2CanonicalizationRange* rangeInfoFor(UChar ch)
+inline const UCS2CanonicalizationRange* rangeInfoFor(UChar ch)
{
- UCS2CanonicalizationRange* info = rangeInfo;
+ const UCS2CanonicalizationRange* info = rangeInfo;
size_t entries = UCS2_CANONICALIZATION_RANGES;
while (true) {
size_t candidate = entries >> 1;
- UCS2CanonicalizationRange* candidateInfo = info + candidate;
+ const UCS2CanonicalizationRange* candidateInfo = info + candidate;
if (ch < candidateInfo->begin)
entries = candidate;
else if (ch <= candidateInfo->end)
@@ -80,7 +80,7 @@ inline UCS2CanonicalizationRange* rangeInfoFor(UChar ch)
}
// Should only be called for characters that have one canonically matching value.
-inline UChar getCanonicalPair(UCS2CanonicalizationRange* info, UChar ch)
+inline UChar getCanonicalPair(const UCS2CanonicalizationRange* info, UChar ch)
{
ASSERT(ch >= info->begin && ch <= info->end);
switch (info->type) {
@@ -108,12 +108,12 @@ inline bool isCanonicallyUnique(UChar ch)
// Returns true if values are equal, under the canonicalization rules.
inline bool areCanonicallyEquivalent(UChar a, UChar b)
{
- UCS2CanonicalizationRange* info = rangeInfoFor(a);
+ const UCS2CanonicalizationRange* info = rangeInfoFor(a);
switch (info->type) {
case CanonicalizeUnique:
return a == b;
case CanonicalizeSet: {
- for (uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) {
+ for (const uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) {
if (a == b)
return true;
}
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
index f0312ea25..99b731588 100644
--- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp
@@ -34,10 +34,6 @@
#include <wtf/text/CString.h>
#include <wtf/text/WTFString.h>
-#ifndef NDEBUG
-#include <stdio.h>
-#endif
-
using namespace WTF;
namespace JSC { namespace Yarr {
@@ -158,7 +154,7 @@ public:
ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
{
- size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+ size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t);
allocatorPool = allocatorPool->ensureCapacity(size);
RELEASE_ASSERT(allocatorPool);
return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
@@ -711,6 +707,7 @@ public:
return true;
case QuantifierNonGreedy:
ASSERT(backTrack->begin != notFound);
+ FALLTHROUGH;
case QuantifierFixedCount:
break;
}
@@ -731,6 +728,7 @@ public:
context->term -= term.atom.parenthesesWidth;
return false;
}
+ FALLTHROUGH;
case QuantifierNonGreedy:
if (backTrack->begin == notFound) {
backTrack->begin = input.getPos();
@@ -746,6 +744,7 @@ public:
context->term -= term.atom.parenthesesWidth;
return true;
}
+ FALLTHROUGH;
case QuantifierFixedCount:
break;
}
@@ -1473,13 +1472,13 @@ public:
m_currentAlternativeIndex = 0;
}
- PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
+ std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator)
{
regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
emitDisjunction(m_pattern.m_body);
regexEnd();
- return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
+ return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator);
}
void checkInput(unsigned count)
@@ -1510,8 +1509,11 @@ public:
void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
{
if (m_pattern.m_ignoreCase) {
- UChar lo = Unicode::toLower(ch);
- UChar hi = Unicode::toUpper(ch);
+ ASSERT(u_tolower(ch) <= 0xFFFF);
+ ASSERT(u_toupper(ch) <= 0xFFFF);
+
+ UChar lo = u_tolower(ch);
+ UChar hi = u_toupper(ch);
if (lo != hi) {
m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
@@ -1710,7 +1712,7 @@ public:
unsigned subpatternId = parenthesesBegin.atom.subpatternId;
unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
- OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
+ auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
unsigned firstTermInParentheses = beginTerm + 1;
parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
@@ -1723,7 +1725,7 @@ public:
m_bodyDisjunction->terms.shrink(beginTerm);
m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
- m_allParenthesesInfo.append(parenthesesDisjunction.release());
+ m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
@@ -1776,7 +1778,7 @@ public:
void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
{
- m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
+ m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
m_bodyDisjunction->terms[0].frameLocation = 0;
m_currentAlternativeIndex = 0;
@@ -1918,13 +1920,13 @@ public:
private:
YarrPattern& m_pattern;
- OwnPtr<ByteDisjunction> m_bodyDisjunction;
+ std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
unsigned m_currentAlternativeIndex;
Vector<ParenthesesStackEntry> m_parenthesesStack;
- Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo;
+ Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
};
-PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
+std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
{
return ByteCompiler(pattern).compile(allocator);
}
diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.h b/Source/JavaScriptCore/yarr/YarrInterpreter.h
index bb64e6d91..dc2f3f77b 100644
--- a/Source/JavaScriptCore/yarr/YarrInterpreter.h
+++ b/Source/JavaScriptCore/yarr/YarrInterpreter.h
@@ -27,8 +27,6 @@
#define YarrInterpreter_h
#include "YarrPattern.h"
-#include <wtf/PassOwnPtr.h>
-#include <wtf/unicode/Unicode.h>
namespace WTF {
class BumpPointerAllocator;
@@ -329,6 +327,8 @@ public:
{
}
+ size_t estimatedSizeInBytes() const { return terms.capacity() * sizeof(ByteTerm); }
+
Vector<ByteTerm> terms;
unsigned m_numSubpatterns;
unsigned m_frameSize;
@@ -337,8 +337,8 @@ public:
struct BytecodePattern {
WTF_MAKE_FAST_ALLOCATED;
public:
- BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction> >& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
- : m_body(body)
+ BytecodePattern(std::unique_ptr<ByteDisjunction> body, Vector<std::unique_ptr<ByteDisjunction>>& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
+ : m_body(WTFMove(body))
, m_ignoreCase(pattern.m_ignoreCase)
, m_multiline(pattern.m_multiline)
, m_allocator(allocator)
@@ -355,7 +355,9 @@ public:
m_userCharacterClasses.shrinkToFit();
}
- OwnPtr<ByteDisjunction> m_body;
+ size_t estimatedSizeInBytes() const { return m_body->estimatedSizeInBytes(); }
+
+ std::unique_ptr<ByteDisjunction> m_body;
bool m_ignoreCase;
bool m_multiline;
// Each BytecodePattern is associated with a RegExp, each RegExp is associated
@@ -366,11 +368,11 @@ public:
CharacterClass* wordcharCharacterClass;
private:
- Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo;
- Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
+ Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
+ Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
};
-JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
+JS_EXPORT_PRIVATE std::unique_ptr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
diff --git a/Source/JavaScriptCore/yarr/YarrJIT.cpp b/Source/JavaScriptCore/yarr/YarrJIT.cpp
index d337cf797..89e2888a0 100644
--- a/Source/JavaScriptCore/yarr/YarrJIT.cpp
+++ b/Source/JavaScriptCore/yarr/YarrJIT.cpp
@@ -53,6 +53,17 @@ class YarrGenerator : private MacroAssembler {
static const RegisterID returnRegister = ARMRegisters::r0;
static const RegisterID returnRegister2 = ARMRegisters::r1;
+#elif CPU(ARM64)
+ static const RegisterID input = ARM64Registers::x0;
+ static const RegisterID index = ARM64Registers::x1;
+ static const RegisterID length = ARM64Registers::x2;
+ static const RegisterID output = ARM64Registers::x3;
+
+ static const RegisterID regT0 = ARM64Registers::x4;
+ static const RegisterID regT1 = ARM64Registers::x5;
+
+ static const RegisterID returnRegister = ARM64Registers::x0;
+ static const RegisterID returnRegister2 = ARM64Registers::x1;
#elif CPU(MIPS)
static const RegisterID input = MIPSRegisters::a0;
static const RegisterID index = MIPSRegisters::a1;
@@ -322,17 +333,27 @@ class YarrGenerator : private MacroAssembler {
jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
}
+ unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
+ {
+ callFrameSize *= sizeof(void*);
+ if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
+ CRASH();
+ callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
+ if (!callFrameSize)
+ CRASH();
+ return callFrameSize;
+ }
void initCallFrame()
{
unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
if (callFrameSize)
- subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
+ subPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister);
}
void removeCallFrame()
{
unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
if (callFrameSize)
- addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
+ addPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister);
}
// Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
@@ -1605,8 +1626,7 @@ class YarrGenerator : private MacroAssembler {
if (term->quantityType == QuantifierFixedCount)
inputOffset -= term->parentheses.disjunction->m_minimumSize;
if (inputOffset) {
- move(index, indexTemporary);
- add32(Imm32(inputOffset), indexTemporary);
+ add32(Imm32(inputOffset), index, indexTemporary);
setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
} else
setSubpatternStart(index, term->parentheses.subpatternId);
@@ -1618,16 +1638,14 @@ class YarrGenerator : private MacroAssembler {
const RegisterID indexTemporary = regT0;
ASSERT(term->quantityCount == 1);
-#ifndef NDEBUG
// Runtime ASSERT to make sure that the nested alternative handled the
// "no input consumed" check.
- if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
+ if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
Jump pastBreakpoint;
pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
- breakpoint();
+ abortWithReason(YARRNoInputConsumed);
pastBreakpoint.link(this);
}
-#endif
// If the parenthese are capturing, store the ending index value to the
// captures array, offsetting as necessary.
@@ -1638,8 +1656,7 @@ class YarrGenerator : private MacroAssembler {
if (term->capture() && compileMode == IncludeSubpatterns) {
int inputOffset = term->inputPosition - m_checked;
if (inputOffset) {
- move(index, indexTemporary);
- add32(Imm32(inputOffset), indexTemporary);
+ add32(Imm32(inputOffset), index, indexTemporary);
setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
} else
setSubpatternEnd(index, term->parentheses.subpatternId);
@@ -1674,16 +1691,16 @@ class YarrGenerator : private MacroAssembler {
}
case OpParenthesesSubpatternTerminalEnd: {
YarrOp& beginOp = m_ops[op.m_previousOp];
-#ifndef NDEBUG
- PatternTerm* term = op.m_term;
-
- // Runtime ASSERT to make sure that the nested alternative handled the
- // "no input consumed" check.
- Jump pastBreakpoint;
- pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
- breakpoint();
- pastBreakpoint.link(this);
-#endif
+ if (!ASSERT_DISABLED) {
+ PatternTerm* term = op.m_term;
+
+ // Runtime ASSERT to make sure that the nested alternative handled the
+ // "no input consumed" check.
+ Jump pastBreakpoint;
+ pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ abortWithReason(YARRNoInputConsumed);
+ pastBreakpoint.link(this);
+ }
// We know that the match is non-zero, we can accept it and
// loop back up to the head of the subpattern.
@@ -2325,7 +2342,7 @@ class YarrGenerator : private MacroAssembler {
m_ops.append(alternativeBeginOpCode);
m_ops.last().m_previousOp = notFound;
m_ops.last().m_term = term;
- Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives;
+ Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
for (unsigned i = 0; i < alternatives.size(); ++i) {
size_t lastOpIndex = m_ops.size() - 1;
@@ -2376,7 +2393,7 @@ class YarrGenerator : private MacroAssembler {
m_ops.append(OpSimpleNestedAlternativeBegin);
m_ops.last().m_previousOp = notFound;
m_ops.last().m_term = term;
- Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives;
+ Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
for (unsigned i = 0; i < alternatives.size(); ++i) {
size_t lastOpIndex = m_ops.size() - 1;
@@ -2450,7 +2467,7 @@ class YarrGenerator : private MacroAssembler {
// to return the failing result.
void opCompileBody(PatternDisjunction* disjunction)
{
- Vector<OwnPtr<PatternAlternative> >& alternatives = disjunction->m_alternatives;
+ Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
size_t currentAlternativeIndex = 0;
// Emit the 'once through' alternatives.
@@ -2549,6 +2566,10 @@ class YarrGenerator : private MacroAssembler {
if (compileMode == IncludeSubpatterns)
loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
#endif
+#elif CPU(ARM64)
+ // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
+ zeroExtend32ToPtr(index, index);
+ zeroExtend32ToPtr(length, length);
#elif CPU(ARM)
push(ARMRegisters::r4);
push(ARMRegisters::r5);
@@ -2559,10 +2580,14 @@ class YarrGenerator : private MacroAssembler {
#elif CPU(MIPS)
// Do nothing.
#endif
+
+ store8(TrustedImm32(1), &m_vm->isExecutingInRegExpJIT);
}
void generateReturn()
{
+ store8(TrustedImm32(0), &m_vm->isExecutingInRegExpJIT);
+
#if CPU(X86_64)
#if OS(WINDOWS)
// Store the return value in the allocated space pointed by rcx.
@@ -2591,8 +2616,9 @@ class YarrGenerator : private MacroAssembler {
}
public:
- YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
- : m_pattern(pattern)
+ YarrGenerator(VM* vm, YarrPattern& pattern, YarrCharSize charSize)
+ : m_vm(vm)
+ , m_pattern(pattern)
, m_charSize(charSize)
, m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
, m_shouldFallBack(false)
@@ -2620,11 +2646,8 @@ public:
initCallFrame();
- // Compile the pattern to the internal 'YarrOp' representation.
opCompileBody(m_pattern.m_body);
- // If we encountered anything we can't handle in the JIT code
- // (e.g. backreferences) then return early.
if (m_shouldFallBack) {
jitObject.setFallBack(true);
return;
@@ -2633,8 +2656,12 @@ public:
generate();
backtrack();
- // Link & finalize the code.
- LinkBuffer linkBuffer(*vm, this, REGEXP_CODE_ID);
+ LinkBuffer linkBuffer(*vm, *this, REGEXP_CODE_ID, JITCompilationCanFail);
+ if (linkBuffer.didFailToAllocate()) {
+ jitObject.setFallBack(true);
+ return;
+ }
+
m_backtrackingState.linkDataLabels(linkBuffer);
if (compileMode == MatchOnly) {
@@ -2652,6 +2679,8 @@ public:
}
private:
+ VM* m_vm;
+
YarrPattern& m_pattern;
YarrCharSize m_charSize;
@@ -2684,9 +2713,9 @@ private:
void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
{
if (mode == MatchOnly)
- YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject);
+ YarrGenerator<MatchOnly>(vm, pattern, charSize).compile(vm, jitObject);
else
- YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject);
+ YarrGenerator<IncludeSubpatterns>(vm, pattern, charSize).compile(vm, jitObject);
}
}}
diff --git a/Source/JavaScriptCore/yarr/YarrJIT.h b/Source/JavaScriptCore/yarr/YarrJIT.h
index b7ce7d38f..4d867607f 100644
--- a/Source/JavaScriptCore/yarr/YarrJIT.h
+++ b/Source/JavaScriptCore/yarr/YarrJIT.h
@@ -48,7 +48,7 @@ class ExecutablePool;
namespace Yarr {
class YarrCodeBlock {
-#if CPU(X86_64)
+#if CPU(X86_64) || CPU(ARM64)
typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length) YARR_CALL;
@@ -108,9 +108,44 @@ public:
}
#if ENABLE(REGEXP_TRACING)
- void *getAddr() { return m_ref.code().executableAddress(); }
+ void *get8BitMatchOnlyAddr()
+ {
+ if (!has8BitCodeMatchOnly())
+ return 0;
+
+ return m_matchOnly8.code().executableAddress();
+ }
+
+ void *get16BitMatchOnlyAddr()
+ {
+ if (!has16BitCodeMatchOnly())
+ return 0;
+
+ return m_matchOnly16.code().executableAddress();
+ }
+
+ void *get8BitMatchAddr()
+ {
+ if (!has8BitCode())
+ return 0;
+
+ return m_ref8.code().executableAddress();
+ }
+
+ void *get16BitMatchAddr()
+ {
+ if (!has16BitCode())
+ return 0;
+
+ return m_ref16.code().executableAddress();
+ }
#endif
+ size_t size() const
+ {
+ return m_ref8.size() + m_ref16.size() + m_matchOnly8.size() + m_matchOnly16.size();
+ }
+
void clear()
{
m_ref8 = MacroAssemblerCodeRef();
diff --git a/Source/JavaScriptCore/yarr/YarrParser.h b/Source/JavaScriptCore/yarr/YarrParser.h
index 8c5d71b5f..761acb557 100644
--- a/Source/JavaScriptCore/yarr/YarrParser.h
+++ b/Source/JavaScriptCore/yarr/YarrParser.h
@@ -29,7 +29,6 @@
#include "Yarr.h"
#include <wtf/ASCIICType.h>
#include <wtf/text/WTFString.h>
-#include <wtf/unicode/Unicode.h>
namespace JSC { namespace Yarr {
@@ -119,6 +118,7 @@ private:
return;
}
// Otherwise just fall through - cached character so treat this as Empty.
+ FALLTHROUGH;
case Empty:
m_character = ch;
@@ -168,7 +168,7 @@ private:
case CachedCharacter:
// Flush the currently cached character, then fall through.
m_delegate.atomCharacterClassAtom(m_character);
-
+ FALLTHROUGH;
case Empty:
case AfterCharacterClass:
m_state = AfterCharacterClass;
@@ -186,7 +186,7 @@ private:
case CachedCharacterHyphen:
m_delegate.atomCharacterClassAtom(m_character);
m_delegate.atomCharacterClassAtom('-');
- // fall through
+ FALLTHROUGH;
case AfterCharacterClassHyphen:
m_delegate.atomCharacterClassBuiltIn(classID, invert);
m_state = Empty;
@@ -232,7 +232,7 @@ private:
: m_delegate(delegate)
, m_backReferenceLimit(backReferenceLimit)
, m_err(NoError)
- , m_data(pattern.getCharacters<CharType>())
+ , m_data(pattern.characters<CharType>())
, m_size(pattern.length())
, m_index(0)
, m_parenthesesNestingDepth(0)
@@ -349,6 +349,7 @@ private:
}
// Fall-through to handle this as an octal escape.
+ FALLTHROUGH;
}
// Octal escape
@@ -656,7 +657,9 @@ private:
}
restoreState(state);
- } // if we did not find a complete quantifer, fall through to the default case.
+ }
+ // if we did not find a complete quantifer, fall through to the default case.
+ FALLTHROUGH;
default:
m_delegate.atomPatternCharacter(consume());
diff --git a/Source/JavaScriptCore/yarr/YarrPattern.cpp b/Source/JavaScriptCore/yarr/YarrPattern.cpp
index 3ce0216e5..00339b755 100644
--- a/Source/JavaScriptCore/yarr/YarrPattern.cpp
+++ b/Source/JavaScriptCore/yarr/YarrPattern.cpp
@@ -84,21 +84,21 @@ public:
}
// Add multiple matches, if necessary.
- UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+ const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
if (info->type == CanonicalizeUnique)
addSorted(m_matchesUnicode, ch);
else
putUnicodeIgnoreCase(ch, info);
}
- void putUnicodeIgnoreCase(UChar ch, UCS2CanonicalizationRange* info)
+ void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info)
{
ASSERT(m_isCaseInsensitive);
ASSERT(ch > 0x7f);
ASSERT(ch >= info->begin && ch <= info->end);
ASSERT(info->type != CanonicalizeUnique);
if (info->type == CanonicalizeSet) {
- for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
+ for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
addSorted(m_matchesUnicode, ch);
} else {
addSorted(m_matchesUnicode, ch);
@@ -129,7 +129,7 @@ public:
if (!m_isCaseInsensitive)
return;
- UCS2CanonicalizationRange* info = rangeInfoFor(lo);
+ const UCS2CanonicalizationRange* info = rangeInfoFor(lo);
while (true) {
// Handle the range [lo .. end]
UChar end = std::min<UChar>(info->end, hi);
@@ -140,7 +140,7 @@ public:
break;
case CanonicalizeSet: {
UChar ch;
- for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
+ for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
addSorted(m_matchesUnicode, ch);
break;
}
@@ -175,16 +175,16 @@ public:
}
- PassOwnPtr<CharacterClass> charClass()
+ std::unique_ptr<CharacterClass> charClass()
{
- OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass);
+ auto characterClass = std::make_unique<CharacterClass>();
characterClass->m_matches.swap(m_matches);
characterClass->m_ranges.swap(m_ranges);
characterClass->m_matchesUnicode.swap(m_matchesUnicode);
characterClass->m_rangesUnicode.swap(m_rangesUnicode);
- return characterClass.release();
+ return characterClass;
}
private:
@@ -274,10 +274,10 @@ public:
, m_characterClassConstructor(pattern.m_ignoreCase)
, m_invertParentheticalAssertion(false)
{
- OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+ auto body = std::make_unique<PatternDisjunction>();
m_pattern.m_body = body.get();
m_alternative = body->addNewAlternative();
- m_pattern.m_disjunctions.append(body.release());
+ m_pattern.m_disjunctions.append(WTFMove(body));
}
~YarrPatternConstructor()
@@ -289,15 +289,15 @@ public:
m_pattern.reset();
m_characterClassConstructor.reset();
- OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+ auto body = std::make_unique<PatternDisjunction>();
m_pattern.m_body = body.get();
m_alternative = body->addNewAlternative();
- m_pattern.m_disjunctions.append(body.release());
+ m_pattern.m_disjunctions.append(WTFMove(body));
}
void assertionBOL()
{
- if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
+ if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) {
m_alternative->m_startsWithBOL = true;
m_alternative->m_containsBOL = true;
m_pattern.m_containsBOL = true;
@@ -322,16 +322,16 @@ public:
return;
}
- UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+ const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
if (info->type == CanonicalizeUnique) {
m_alternative->m_terms.append(PatternTerm(ch));
return;
}
m_characterClassConstructor.putUnicodeIgnoreCase(ch, info);
- OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+ auto newCharacterClass = m_characterClassConstructor.charClass();
m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false));
- m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
+ m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass));
}
void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
@@ -391,9 +391,9 @@ public:
void atomCharacterClassEnd()
{
- OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+ auto newCharacterClass = m_characterClassConstructor.charClass();
m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass));
- m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
+ m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass));
}
void atomParenthesesSubpatternBegin(bool capture = true)
@@ -402,19 +402,19 @@ public:
if (capture)
m_pattern.m_numSubpatterns++;
- OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+ auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative);
m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false));
m_alternative = parenthesesDisjunction->addNewAlternative();
- m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
+ m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction));
}
void atomParentheticalAssertionBegin(bool invert = false)
{
- OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+ auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative);
m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert));
m_alternative = parenthesesDisjunction->addNewAlternative();
m_invertParentheticalAssertion = invert;
- m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
+ m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction));
}
void atomParenthesesEnd()
@@ -479,12 +479,12 @@ public:
// skip alternatives with m_startsWithBOL set true.
PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
{
- OwnPtr<PatternDisjunction> newDisjunction;
+ std::unique_ptr<PatternDisjunction> newDisjunction;
for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
if (!newDisjunction) {
- newDisjunction = adoptPtr(new PatternDisjunction());
+ newDisjunction = std::make_unique<PatternDisjunction>();
newDisjunction->m_parent = disjunction->m_parent;
}
PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
@@ -498,7 +498,7 @@ public:
return 0;
PatternDisjunction* copiedDisjunction = newDisjunction.get();
- m_pattern.m_disjunctions.append(newDisjunction.release());
+ m_pattern.m_disjunctions.append(WTFMove(newDisjunction));
return copiedDisjunction;
}
@@ -666,6 +666,8 @@ public:
minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize);
maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize);
hasFixedSize &= alternative->m_hasFixedSize;
+ if (alternative->m_minimumSize > INT_MAX)
+ m_pattern.m_containsUnsignedLengthPattern = true;
}
ASSERT(minimumInputSize != UINT_MAX);
@@ -696,7 +698,7 @@ public:
if (m_pattern.m_numSubpatterns)
return;
- Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives;
+ Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
for (size_t i = 0; i < alternatives.size(); ++i) {
Vector<PatternTerm>& terms = alternatives[i]->m_terms;
if (terms.size()) {
@@ -737,11 +739,12 @@ public:
}
}
- bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex)
+ bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t endIndex)
{
Vector<PatternTerm>& terms = alternative->m_terms;
- for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) {
+ ASSERT(endIndex <= terms.size());
+ for (size_t termIndex = firstTermIndex; termIndex < endIndex; ++termIndex) {
PatternTerm& term = terms[termIndex];
if (term.m_capture)
@@ -750,7 +753,7 @@ public:
if (term.type == PatternTerm::TypeParenthesesSubpattern) {
PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
- if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
+ if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size()))
return true;
}
}
@@ -766,7 +769,7 @@ public:
// beginning and the end of the match.
void optimizeDotStarWrappedExpressions()
{
- Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives;
+ Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
if (alternatives.size() != 1)
return;
@@ -775,7 +778,7 @@ public:
if (terms.size() >= 3) {
bool startsWithBOL = false;
bool endsWithEOL = false;
- size_t termIndex, firstExpressionTerm, lastExpressionTerm;
+ size_t termIndex, firstExpressionTerm;
termIndex = 0;
if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) {
@@ -798,14 +801,13 @@ public:
PatternTerm& lastNonAnchorTerm = terms[termIndex];
if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy))
return;
-
- lastExpressionTerm = termIndex - 1;
- if (firstExpressionTerm > lastExpressionTerm)
+ size_t endIndex = termIndex;
+ if (firstExpressionTerm >= endIndex)
return;
- if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) {
- for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex)
+ if (!containsCapturingTerms(alternative, firstExpressionTerm, endIndex)) {
+ for (termIndex = terms.size() - 1; termIndex >= endIndex; --termIndex)
terms.remove(termIndex);
for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex)
@@ -864,6 +866,7 @@ YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline,
, m_multiline(multiline)
, m_containsBackreferences(false)
, m_containsBOL(false)
+ , m_containsUnsignedLengthPattern(false)
, m_numSubpatterns(0)
, m_maxBackReference(0)
, newlineCached(0)
diff --git a/Source/JavaScriptCore/yarr/YarrPattern.h b/Source/JavaScriptCore/yarr/YarrPattern.h
index e7d187c2b..5482de5af 100644
--- a/Source/JavaScriptCore/yarr/YarrPattern.h
+++ b/Source/JavaScriptCore/yarr/YarrPattern.h
@@ -28,12 +28,9 @@
#define YarrPattern_h
#include <wtf/CheckedArithmetic.h>
-#include <wtf/OwnPtr.h>
-#include <wtf/PassOwnPtr.h>
#include <wtf/RefCounted.h>
#include <wtf/Vector.h>
#include <wtf/text/WTFString.h>
-#include <wtf/unicode/Unicode.h>
namespace JSC { namespace Yarr {
@@ -270,12 +267,11 @@ public:
PatternAlternative* addNewAlternative()
{
- PatternAlternative* alternative = new PatternAlternative(this);
- m_alternatives.append(adoptPtr(alternative));
- return alternative;
+ m_alternatives.append(std::make_unique<PatternAlternative>(this));
+ return static_cast<PatternAlternative*>(m_alternatives.last().get());
}
- Vector<OwnPtr<PatternAlternative> > m_alternatives;
+ Vector<std::unique_ptr<PatternAlternative>> m_alternatives;
PatternAlternative* m_parent;
unsigned m_minimumSize;
unsigned m_callFrameSize;
@@ -286,13 +282,13 @@ public:
// (please to be calling newlineCharacterClass() et al on your
// friendly neighborhood YarrPattern instance to get nicely
// cached copies).
-CharacterClass* newlineCreate();
-CharacterClass* digitsCreate();
-CharacterClass* spacesCreate();
-CharacterClass* wordcharCreate();
-CharacterClass* nondigitsCreate();
-CharacterClass* nonspacesCreate();
-CharacterClass* nonwordcharCreate();
+std::unique_ptr<CharacterClass> newlineCreate();
+std::unique_ptr<CharacterClass> digitsCreate();
+std::unique_ptr<CharacterClass> spacesCreate();
+std::unique_ptr<CharacterClass> wordcharCreate();
+std::unique_ptr<CharacterClass> nondigitsCreate();
+std::unique_ptr<CharacterClass> nonspacesCreate();
+std::unique_ptr<CharacterClass> nonwordcharCreate();
struct TermChain {
TermChain(PatternTerm term)
@@ -313,6 +309,7 @@ struct YarrPattern {
m_containsBackreferences = false;
m_containsBOL = false;
+ m_containsUnsignedLengthPattern = false;
newlineCached = 0;
digitsCached = 0;
@@ -331,46 +328,65 @@ struct YarrPattern {
return m_maxBackReference > m_numSubpatterns;
}
+ bool containsUnsignedLengthPattern()
+ {
+ return m_containsUnsignedLengthPattern;
+ }
+
CharacterClass* newlineCharacterClass()
{
- if (!newlineCached)
- m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate()));
+ if (!newlineCached) {
+ m_userCharacterClasses.append(newlineCreate());
+ newlineCached = m_userCharacterClasses.last().get();
+ }
return newlineCached;
}
CharacterClass* digitsCharacterClass()
{
- if (!digitsCached)
- m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate()));
+ if (!digitsCached) {
+ m_userCharacterClasses.append(digitsCreate());
+ digitsCached = m_userCharacterClasses.last().get();
+ }
return digitsCached;
}
CharacterClass* spacesCharacterClass()
{
- if (!spacesCached)
- m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate()));
+ if (!spacesCached) {
+ m_userCharacterClasses.append(spacesCreate());
+ spacesCached = m_userCharacterClasses.last().get();
+ }
return spacesCached;
}
CharacterClass* wordcharCharacterClass()
{
- if (!wordcharCached)
- m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate()));
+ if (!wordcharCached) {
+ m_userCharacterClasses.append(wordcharCreate());
+ wordcharCached = m_userCharacterClasses.last().get();
+ }
return wordcharCached;
}
CharacterClass* nondigitsCharacterClass()
{
- if (!nondigitsCached)
- m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate()));
+ if (!nondigitsCached) {
+ m_userCharacterClasses.append(nondigitsCreate());
+ nondigitsCached = m_userCharacterClasses.last().get();
+ }
return nondigitsCached;
}
CharacterClass* nonspacesCharacterClass()
{
- if (!nonspacesCached)
- m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate()));
+ if (!nonspacesCached) {
+ m_userCharacterClasses.append(nonspacesCreate());
+ nonspacesCached = m_userCharacterClasses.last().get();
+ }
return nonspacesCached;
}
CharacterClass* nonwordcharCharacterClass()
{
- if (!nonwordcharCached)
- m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate()));
+ if (!nonwordcharCached) {
+ m_userCharacterClasses.append(nonwordcharCreate());
+ nonwordcharCached = m_userCharacterClasses.last().get();
+ }
return nonwordcharCached;
}
@@ -378,11 +394,12 @@ struct YarrPattern {
bool m_multiline : 1;
bool m_containsBackreferences : 1;
bool m_containsBOL : 1;
+ bool m_containsUnsignedLengthPattern : 1;
unsigned m_numSubpatterns;
unsigned m_maxBackReference;
PatternDisjunction* m_body;
- Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions;
- Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
+ Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions;
+ Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
private:
const char* compile(const String& patternString);
diff --git a/Source/JavaScriptCore/yarr/yarr.pri b/Source/JavaScriptCore/yarr/yarr.pri
deleted file mode 100644
index 7e9b4d3f3..000000000
--- a/Source/JavaScriptCore/yarr/yarr.pri
+++ /dev/null
@@ -1,12 +0,0 @@
-# -------------------------------------------------------------------
-# Project file for YARR
-#
-# See 'Tools/qmake/README' for an overview of the build system
-# -------------------------------------------------------------------
-
-SOURCES += \
- $$PWD/YarrInterpreter.cpp \
- $$PWD/YarrPattern.cpp \
- $$PWD/YarrSyntaxChecker.cpp \
- $$PWD/YarrCanonicalizeUCS2.cpp
-