summaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/regexp')
-rw-r--r--deps/v8/src/regexp/experimental/experimental-compiler.cc3
-rw-r--r--deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc8
-rw-r--r--deps/v8/src/regexp/regexp-ast.h26
-rw-r--r--deps/v8/src/regexp/regexp-compiler-tonode.cc117
-rw-r--r--deps/v8/src/regexp/regexp-compiler.cc133
-rw-r--r--deps/v8/src/regexp/regexp-compiler.h11
-rw-r--r--deps/v8/src/regexp/regexp-nodes.h25
-rw-r--r--deps/v8/src/regexp/regexp-parser.cc115
-rw-r--r--deps/v8/src/regexp/regexp.cc7
9 files changed, 171 insertions, 274 deletions
diff --git a/deps/v8/src/regexp/experimental/experimental-compiler.cc b/deps/v8/src/regexp/experimental/experimental-compiler.cc
index 277b8df570..8b1d841536 100644
--- a/deps/v8/src/regexp/experimental/experimental-compiler.cc
+++ b/deps/v8/src/regexp/experimental/experimental-compiler.cc
@@ -64,17 +64,14 @@ class CanBeHandledVisitor final : private RegExpVisitor {
}
void* VisitCharacterClass(RegExpCharacterClass* node, void*) override {
- result_ = result_ && AreSuitableFlags(node->flags());
return nullptr;
}
void* VisitAssertion(RegExpAssertion* node, void*) override {
- result_ = result_ && AreSuitableFlags(node->flags());
return nullptr;
}
void* VisitAtom(RegExpAtom* node, void*) override {
- result_ = result_ && AreSuitableFlags(node->flags());
return nullptr;
}
diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
index 64e7b869b0..bb82c270b7 100644
--- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
+++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
@@ -747,7 +747,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
__ LoadU64(r4, MemOperand(frame_pointer(), kStartIndex));
__ subi(r3, current_input_offset(), Operand(char_size()));
if (mode_ == UC16) {
- __ ShiftLeftImm(r0, r4, Operand(1));
+ __ ShiftLeftU64(r0, r4, Operand(1));
__ sub(r3, r3, r0);
} else {
__ sub(r3, r3, r4);
@@ -810,7 +810,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
__ sub(r4, end_of_input_address(), r4);
// r4 is length of input in bytes.
if (mode_ == UC16) {
- __ ShiftRightImm(r4, r4, Operand(1));
+ __ ShiftRightU64(r4, r4, Operand(1));
}
// r4 is length of input in characters.
__ add(r4, r4, r5);
@@ -828,9 +828,9 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
__ mr(r25, r5);
}
if (mode_ == UC16) {
- __ ShiftRightArithImm(r5, r5, 1);
+ __ ShiftRightS64(r5, r5, Operand(1));
__ add(r5, r4, r5);
- __ ShiftRightArithImm(r6, r6, 1);
+ __ ShiftRightS64(r6, r6, Operand(1));
__ add(r6, r4, r6);
} else {
__ add(r5, r4, r5);
diff --git a/deps/v8/src/regexp/regexp-ast.h b/deps/v8/src/regexp/regexp-ast.h
index c2ce0c4c0f..2b9f767c24 100644
--- a/deps/v8/src/regexp/regexp-ast.h
+++ b/deps/v8/src/regexp/regexp-ast.h
@@ -280,8 +280,7 @@ class RegExpAssertion final : public RegExpTree {
NON_BOUNDARY = 5,
LAST_TYPE = NON_BOUNDARY,
};
- RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
- : assertion_type_(type), flags_(flags) {}
+ explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAssertion* AsAssertion() override;
@@ -291,11 +290,9 @@ class RegExpAssertion final : public RegExpTree {
int min_match() override { return 0; }
int max_match() override { return 0; }
AssertionType assertion_type() const { return assertion_type_; }
- JSRegExp::Flags flags() const { return flags_; }
private:
const AssertionType assertion_type_;
- const JSRegExp::Flags flags_;
};
@@ -312,21 +309,17 @@ class RegExpCharacterClass final : public RegExpTree {
using CharacterClassFlags = base::Flags<Flag>;
RegExpCharacterClass(
- Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
+ Zone* zone, ZoneList<CharacterRange>* ranges,
CharacterClassFlags character_class_flags = CharacterClassFlags())
- : set_(ranges),
- flags_(flags),
- character_class_flags_(character_class_flags) {
+ : set_(ranges), character_class_flags_(character_class_flags) {
// Convert the empty set of ranges to the negated Everything() range.
if (ranges->is_empty()) {
ranges->Add(CharacterRange::Everything(), zone);
character_class_flags_ ^= NEGATED;
}
}
- RegExpCharacterClass(base::uc16 type, JSRegExp::Flags flags)
- : set_(type),
- flags_(flags),
- character_class_flags_(CharacterClassFlags()) {}
+ explicit RegExpCharacterClass(base::uc16 type)
+ : set_(type), character_class_flags_(CharacterClassFlags()) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpCharacterClass* AsCharacterClass() override;
@@ -356,23 +349,19 @@ class RegExpCharacterClass final : public RegExpTree {
base::uc16 standard_type() const { return set_.standard_set_type(); }
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
- JSRegExp::Flags flags() const { return flags_; }
bool contains_split_surrogate() const {
return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
}
private:
CharacterSet set_;
- const JSRegExp::Flags flags_;
CharacterClassFlags character_class_flags_;
};
class RegExpAtom final : public RegExpTree {
public:
- explicit RegExpAtom(base::Vector<const base::uc16> data,
- JSRegExp::Flags flags)
- : data_(data), flags_(flags) {}
+ explicit RegExpAtom(base::Vector<const base::uc16> data) : data_(data) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAtom* AsAtom() override;
@@ -383,12 +372,9 @@ class RegExpAtom final : public RegExpTree {
void AppendToText(RegExpText* text, Zone* zone) override;
base::Vector<const base::uc16> data() { return data_; }
int length() { return data_.length(); }
- JSRegExp::Flags flags() const { return flags_; }
- bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
private:
base::Vector<const base::uc16> data_;
- const JSRegExp::Flags flags_;
};
diff --git a/deps/v8/src/regexp/regexp-compiler-tonode.cc b/deps/v8/src/regexp/regexp-compiler-tonode.cc
index c9d4cfc4f9..f668aa6d84 100644
--- a/deps/v8/src/regexp/regexp-compiler-tonode.cc
+++ b/deps/v8/src/regexp/regexp-compiler-tonode.cc
@@ -200,19 +200,17 @@ ZoneList<CharacterRange>* ToCanonicalZoneList(
}
void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
- RegExpNode* on_success, UnicodeRangeSplitter* splitter,
- JSRegExp::Flags flags) {
+ RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* bmp =
ToCanonicalZoneList(splitter->bmp(), compiler->zone());
if (bmp == nullptr) return;
result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
- compiler->zone(), bmp, compiler->read_backward(), on_success, flags)));
+ compiler->zone(), bmp, compiler->read_backward(), on_success)));
}
void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
- UnicodeRangeSplitter* splitter,
- JSRegExp::Flags flags) {
+ UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* non_bmp =
ToCanonicalZoneList(splitter->non_bmp(), compiler->zone());
if (non_bmp == nullptr) return;
@@ -237,7 +235,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, to_t), compiler->read_backward(),
- on_success, flags)));
+ on_success)));
} else {
if (from_t != kTrailSurrogateStart) {
// Add [from_l][from_t-\udfff]
@@ -245,7 +243,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, kTrailSurrogateEnd),
- compiler->read_backward(), on_success, flags)));
+ compiler->read_backward(), on_success)));
from_l++;
}
if (to_t != kTrailSurrogateEnd) {
@@ -254,7 +252,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(to_l),
CharacterRange::Range(kTrailSurrogateStart, to_t),
- compiler->read_backward(), on_success, flags)));
+ compiler->read_backward(), on_success)));
to_l--;
}
if (from_l <= to_l) {
@@ -263,7 +261,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Range(from_l, to_l),
CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
- compiler->read_backward(), on_success, flags)));
+ compiler->read_backward(), on_success)));
}
}
}
@@ -271,39 +269,38 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
- ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
- JSRegExp::Flags flags) {
+ ZoneList<CharacterRange>* match, RegExpNode* on_success,
+ bool read_backward) {
Zone* zone = compiler->zone();
RegExpNode* match_node = TextNode::CreateForCharacterRanges(
- zone, match, read_backward, on_success, flags);
+ zone, match, read_backward, on_success);
int stack_register = compiler->UnicodeLookaroundStackRegister();
int position_register = compiler->UnicodeLookaroundPositionRegister();
RegExpLookaround::Builder lookaround(false, match_node, stack_register,
position_register);
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
+ zone, lookbehind, !read_backward, lookaround.on_match_success());
return lookaround.ForMatch(negative_match);
}
RegExpNode* MatchAndNegativeLookaroundInReadDirection(
RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
- bool read_backward, JSRegExp::Flags flags) {
+ bool read_backward) {
Zone* zone = compiler->zone();
int stack_register = compiler->UnicodeLookaroundStackRegister();
int position_register = compiler->UnicodeLookaroundPositionRegister();
RegExpLookaround::Builder lookaround(false, on_success, stack_register,
position_register);
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- zone, lookahead, read_backward, lookaround.on_match_success(), flags);
+ zone, lookahead, read_backward, lookaround.on_match_success());
return TextNode::CreateForCharacterRanges(
- zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
+ zone, match, read_backward, lookaround.ForMatch(negative_match));
}
void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
- UnicodeRangeSplitter* splitter,
- JSRegExp::Flags flags) {
+ UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* lead_surrogates =
ToCanonicalZoneList(splitter->lead_surrogates(), compiler->zone());
if (lead_surrogates == nullptr) return;
@@ -317,20 +314,19 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
// Reading backward. Assert that reading forward, there is no trail
// surrogate, and then backward match the lead surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, trail_surrogates, lead_surrogates, on_success, true, flags);
+ compiler, trail_surrogates, lead_surrogates, on_success, true);
} else {
// Reading forward. Forward match the lead surrogate and assert that
// no trail surrogate follows.
match = MatchAndNegativeLookaroundInReadDirection(
- compiler, lead_surrogates, trail_surrogates, on_success, false, flags);
+ compiler, lead_surrogates, trail_surrogates, on_success, false);
}
result->AddAlternative(GuardedAlternative(match));
}
void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
- UnicodeRangeSplitter* splitter,
- JSRegExp::Flags flags) {
+ UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* trail_surrogates =
ToCanonicalZoneList(splitter->trail_surrogates(), compiler->zone());
if (trail_surrogates == nullptr) return;
@@ -344,12 +340,12 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
// Reading backward. Backward match the trail surrogate and assert that no
// lead surrogate precedes it.
match = MatchAndNegativeLookaroundInReadDirection(
- compiler, trail_surrogates, lead_surrogates, on_success, true, flags);
+ compiler, trail_surrogates, lead_surrogates, on_success, true);
} else {
// Reading forward. Assert that reading backward, there is no lead
// surrogate, and then forward match the trail surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, lead_surrogates, trail_surrogates, on_success, false, flags);
+ compiler, lead_surrogates, trail_surrogates, on_success, false);
}
result->AddAlternative(GuardedAlternative(match));
}
@@ -365,9 +361,7 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
// the associated trail surrogate.
ZoneList<CharacterRange>* range = CharacterRange::List(
zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
- default_flags);
+ return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
}
void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
@@ -410,10 +404,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
set_.Canonicalize();
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* ranges = this->ranges(zone);
- if (NeedsUnicodeCaseEquivalents(flags_)) {
+ if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
AddUnicodeCaseEquivalents(ranges, zone);
}
- if (IsUnicode(flags_) && !compiler->one_byte() &&
+ if (IsUnicode(compiler->flags()) && !compiler->one_byte() &&
!contains_split_surrogate()) {
if (is_negated()) {
ZoneList<CharacterRange>* negated =
@@ -422,9 +416,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
ranges = negated;
}
if (ranges->length() == 0) {
- JSRegExp::Flags default_flags;
RegExpCharacterClass* fail =
- zone->New<RegExpCharacterClass>(zone, ranges, default_flags);
+ zone->New<RegExpCharacterClass>(zone, ranges);
return zone->New<TextNode>(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
@@ -432,10 +425,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
} else {
ChoiceNode* result = zone->New<ChoiceNode>(2, zone);
UnicodeRangeSplitter splitter(ranges);
- AddBmpCharacters(compiler, result, on_success, &splitter, flags_);
- AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter, flags_);
- AddLoneLeadSurrogates(compiler, result, on_success, &splitter, flags_);
- AddLoneTrailSurrogates(compiler, result, on_success, &splitter, flags_);
+ AddBmpCharacters(compiler, result, on_success, &splitter);
+ AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
+ AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
+ AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
return result;
@@ -510,12 +503,10 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
// i is length or it is the index of an atom.
if (i == length) break;
int first_atom = i;
- JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
i++;
while (i < length) {
RegExpTree* alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
- if (alternative->AsAtom()->flags() != flags) break;
i++;
}
// Sort atoms to get ones with common prefixes together.
@@ -527,7 +518,7 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
DCHECK_LT(first_atom, alternatives->length());
DCHECK_LE(i, alternatives->length());
DCHECK_LE(first_atom, i);
- if (IgnoreCase(flags)) {
+ if (IgnoreCase(compiler->flags())) {
#ifdef V8_INTL_SUPPORT
alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
i - first_atom);
@@ -564,7 +555,6 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
continue;
}
RegExpAtom* const atom = alternative->AsAtom();
- JSRegExp::Flags flags = atom->flags();
#ifdef V8_INTL_SUPPORT
icu::UnicodeString common_prefix(atom->data().at(0));
#else
@@ -577,18 +567,17 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
RegExpAtom* const atom = alternative->AsAtom();
- if (atom->flags() != flags) break;
#ifdef V8_INTL_SUPPORT
icu::UnicodeString new_prefix(atom->data().at(0));
if (new_prefix != common_prefix) {
- if (!IgnoreCase(flags)) break;
+ if (!IgnoreCase(compiler->flags())) break;
if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
break;
}
#else
unibrow::uchar new_prefix = atom->data().at(0);
if (new_prefix != common_prefix) {
- if (!IgnoreCase(flags)) break;
+ if (!IgnoreCase(compiler->flags())) break;
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
compiler->isolate()->regexp_macro_assembler_canonicalize();
new_prefix = Canonical(canonicalize, new_prefix);
@@ -617,8 +606,8 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
}
}
}
- RegExpAtom* prefix = zone->New<RegExpAtom>(
- atom->data().SubVector(0, prefix_length), flags);
+ RegExpAtom* prefix =
+ zone->New<RegExpAtom>(atom->data().SubVector(0, prefix_length));
ZoneList<RegExpTree*>* pair = zone->New<ZoneList<RegExpTree*>>(2, zone);
pair->Add(prefix, zone);
ZoneList<RegExpTree*>* suffixes =
@@ -631,8 +620,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
suffixes->Add(zone->New<RegExpEmpty>(), zone);
} else {
RegExpTree* suffix = zone->New<RegExpAtom>(
- old_atom->data().SubVector(prefix_length, old_atom->length()),
- flags);
+ old_atom->data().SubVector(prefix_length, old_atom->length()));
suffixes->Add(suffix, zone);
}
}
@@ -670,7 +658,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
i++;
continue;
}
- JSRegExp::Flags flags = atom->flags();
+ const JSRegExp::Flags flags = compiler->flags();
DCHECK_IMPLIES(IsUnicode(flags),
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
bool contains_trail_surrogate =
@@ -684,7 +672,6 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (!alternative->IsAtom()) break;
RegExpAtom* const atom = alternative->AsAtom();
if (atom->length() != 1) break;
- if (atom->flags() != flags) break;
DCHECK_IMPLIES(IsUnicode(flags),
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
contains_trail_surrogate |=
@@ -705,8 +692,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (IsUnicode(flags) && contains_trail_surrogate) {
character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
}
- alternatives->at(write_posn++) = zone->New<RegExpCharacterClass>(
- zone, ranges, flags, character_class_flags);
+ alternatives->at(write_posn++) =
+ zone->New<RegExpCharacterClass>(zone, ranges, character_class_flags);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {
@@ -754,7 +741,7 @@ RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpAssertion::AssertionType type,
JSRegExp::Flags flags) {
- DCHECK(NeedsUnicodeCaseEquivalents(flags));
+ CHECK(NeedsUnicodeCaseEquivalents(flags));
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* word_range =
zone->New<ZoneList<CharacterRange>>(2, zone);
@@ -772,13 +759,13 @@ RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
stack_register, position_register);
RegExpNode* backward = TextNode::CreateForCharacterRanges(
- zone, word_range, true, lookbehind.on_match_success(), flags);
+ zone, word_range, true, lookbehind.on_match_success());
// Look to the right.
RegExpLookaround::Builder lookahead(lookahead_for_word,
lookbehind.ForMatch(backward),
stack_register, position_register);
RegExpNode* forward = TextNode::CreateForCharacterRanges(
- zone, word_range, false, lookahead.on_match_success(), flags);
+ zone, word_range, false, lookahead.on_match_success());
result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
}
return result;
@@ -796,14 +783,14 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
case START_OF_INPUT:
return AssertionNode::AtStart(on_success);
case BOUNDARY:
- return NeedsUnicodeCaseEquivalents(flags_)
+ return NeedsUnicodeCaseEquivalents(compiler->flags())
? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
- flags_)
+ compiler->flags())
: AssertionNode::AtBoundary(on_success);
case NON_BOUNDARY:
- return NeedsUnicodeCaseEquivalents(flags_)
- ? BoundaryAssertionAsLookaround(compiler, on_success,
- NON_BOUNDARY, flags_)
+ return NeedsUnicodeCaseEquivalents(compiler->flags())
+ ? BoundaryAssertionAsLookaround(
+ compiler, on_success, NON_BOUNDARY, compiler->flags())
: AssertionNode::AtNonBoundary(on_success);
case END_OF_INPUT:
return AssertionNode::AtEnd(on_success);
@@ -819,9 +806,7 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
ZoneList<CharacterRange>* newline_ranges =
zone->New<ZoneList<CharacterRange>>(3, zone);
CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- RegExpCharacterClass* newline_atom =
- zone->New<RegExpCharacterClass>('n', default_flags);
+ RegExpCharacterClass* newline_atom = zone->New<RegExpCharacterClass>('n');
TextNode* newline_matcher =
zone->New<TextNode>(newline_atom, false,
ActionNode::PositiveSubmatchSuccess(
@@ -975,16 +960,11 @@ class AssertionSequenceRewriter final {
uint32_t seen_assertions = 0;
STATIC_ASSERT(RegExpAssertion::LAST_TYPE < kUInt32Size * kBitsPerByte);
- // Flags must match for folding.
- JSRegExp::Flags flags = terms_->at(from)->AsAssertion()->flags();
- bool saw_mismatched_flags = false;
-
for (int i = from; i < to; i++) {
RegExpAssertion* t = terms_->at(i)->AsAssertion();
- if (t->flags() != flags) saw_mismatched_flags = true;
const uint32_t bit = 1 << t->assertion_type();
- if ((seen_assertions & bit) && !saw_mismatched_flags) {
+ if (seen_assertions & bit) {
// Fold duplicates.
terms_->Set(i, zone_->New<RegExpEmpty>());
}
@@ -1006,8 +986,7 @@ class AssertionSequenceRewriter final {
// negated '*' (everything) range serves the purpose.
ZoneList<CharacterRange>* ranges =
zone_->New<ZoneList<CharacterRange>>(0, zone_);
- RegExpCharacterClass* cc =
- zone_->New<RegExpCharacterClass>(zone_, ranges, JSRegExp::Flags());
+ RegExpCharacterClass* cc = zone_->New<RegExpCharacterClass>(zone_, ranges);
terms_->Set(from, cc);
// Zero out the rest.
diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc
index 04599f6a39..38a3d4447f 100644
--- a/deps/v8/src/regexp/regexp-compiler.cc
+++ b/deps/v8/src/regexp/regexp-compiler.cc
@@ -240,12 +240,13 @@ class RecursionCheck {
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
- bool one_byte)
+ JSRegExp::Flags flags, bool one_byte)
: next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
unicode_lookaround_stack_register_(kNoRegister),
unicode_lookaround_position_register_(kNoRegister),
work_list_(nullptr),
recursion_depth_(0),
+ flags_(flags),
one_byte_(one_byte),
reg_exp_too_big_(false),
limiting_recursion_(false),
@@ -279,6 +280,9 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
if (!node->label()->is_bound()) node->Emit(this, &new_trace);
}
if (reg_exp_too_big_) {
+ if (FLAG_correctness_fuzzer_suppressions) {
+ FATAL("Aborting on excess zone allocation");
+ }
macro_assembler_->AbortedCodeGeneration();
return CompilationResult::RegExpTooBig();
}
@@ -1585,7 +1589,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
QuickCheckDetails::Position* pos =
details->positions(characters_filled_in);
base::uc16 c = quarks[i];
- if (elm.atom()->ignore_case()) {
+ if (IgnoreCase(compiler->flags())) {
unibrow::uchar chars[4];
int length = GetCaseIndependentLetters(
isolate, c, compiler->one_byte(), chars, 4);
@@ -1815,16 +1819,16 @@ class IterationDecrementer {
LoopChoiceNode* node_;
};
-RegExpNode* SeqRegExpNode::FilterOneByte(int depth) {
+RegExpNode* SeqRegExpNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
VisitMarker marker(info());
- return FilterSuccessor(depth - 1);
+ return FilterSuccessor(depth - 1, flags);
}
-RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
- RegExpNode* next = on_success_->FilterOneByte(depth - 1);
+RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, JSRegExp::Flags flags) {
+ RegExpNode* next = on_success_->FilterOneByte(depth - 1, flags);
if (next == nullptr) return set_replacement(nullptr);
on_success_ = next;
return set_replacement(this);
@@ -1845,7 +1849,7 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
return false;
}
-RegExpNode* TextNode::FilterOneByte(int depth) {
+RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
@@ -1857,7 +1861,7 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
base::Vector<const base::uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
base::uc16 c = quarks[j];
- if (elm.atom()->ignore_case()) {
+ if (IgnoreCase(flags)) {
c = unibrow::Latin1::TryConvertToLatin1(c);
}
if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
@@ -1876,8 +1880,7 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
if (range_count != 0 && ranges->at(0).from() == 0 &&
ranges->at(0).to() >= String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (IgnoreCase(cc->flags()) &&
- RangesContainLatin1Equivalents(ranges)) {
+ if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
continue;
}
return set_replacement(nullptr);
@@ -1886,8 +1889,7 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
if (range_count == 0 ||
ranges->at(0).from() > String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (IgnoreCase(cc->flags()) &&
- RangesContainLatin1Equivalents(ranges)) {
+ if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
continue;
}
return set_replacement(nullptr);
@@ -1895,26 +1897,27 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
}
}
}
- return FilterSuccessor(depth - 1);
+ return FilterSuccessor(depth - 1, flags);
}
-RegExpNode* LoopChoiceNode::FilterOneByte(int depth) {
+RegExpNode* LoopChoiceNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
{
VisitMarker marker(info());
- RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
+ RegExpNode* continue_replacement =
+ continue_node_->FilterOneByte(depth - 1, flags);
// If we can't continue after the loop then there is no sense in doing the
// loop.
if (continue_replacement == nullptr) return set_replacement(nullptr);
}
- return ChoiceNode::FilterOneByte(depth - 1);
+ return ChoiceNode::FilterOneByte(depth - 1, flags);
}
-RegExpNode* ChoiceNode::FilterOneByte(int depth) {
+RegExpNode* ChoiceNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -1934,7 +1937,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth) {
RegExpNode* survivor = nullptr;
for (int i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
- RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
+ RegExpNode* replacement =
+ alternative.node()->FilterOneByte(depth - 1, flags);
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
if (replacement != nullptr) {
alternatives_->at(i).set_node(replacement);
@@ -1954,7 +1958,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth) {
zone()->New<ZoneList<GuardedAlternative>>(surviving, zone());
for (int i = 0; i < choice_count; i++) {
RegExpNode* replacement =
- alternatives_->at(i).node()->FilterOneByte(depth - 1);
+ alternatives_->at(i).node()->FilterOneByte(depth - 1, flags);
if (replacement != nullptr) {
alternatives_->at(i).set_node(replacement);
new_alternatives->Add(alternatives_->at(i), zone());
@@ -1964,7 +1968,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth) {
return this;
}
-RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
+RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
+ JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -1972,12 +1977,12 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = continue_node();
- RegExpNode* replacement = node->FilterOneByte(depth - 1);
+ RegExpNode* replacement = node->FilterOneByte(depth - 1, flags);
if (replacement == nullptr) return set_replacement(nullptr);
alternatives_->at(kContinueIndex).set_node(replacement);
RegExpNode* neg_node = lookaround_node();
- RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, flags);
// If the negative lookahead is always going to fail then
// we don't need to check it.
if (neg_replacement == nullptr) return set_replacement(replacement);
@@ -2316,13 +2321,13 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
TextElement elm = elements()->at(i);
int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) {
- if (SkipPass(pass, elm.atom()->ignore_case())) continue;
+ if (SkipPass(pass, IgnoreCase(compiler->flags()))) continue;
base::Vector<const base::uc16> quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
if (first_element_checked && i == 0 && j == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
base::uc16 quark = quarks[j];
- if (elm.atom()->ignore_case()) {
+ if (IgnoreCase(compiler->flags())) {
// Everywhere else we assume that a non-Latin-1 character cannot match
// a Latin-1 character. Avoid the cases where this is assumption is
// invalid by using the Latin1 equivalent instead.
@@ -2391,29 +2396,27 @@ bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges,
bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags) {
+ RegExpNode* on_success) {
DCHECK_NOT_NULL(ranges);
ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(1, zone);
- elms->Add(TextElement::CharClass(
- zone->New<RegExpCharacterClass>(zone, ranges, flags)),
- zone);
+ elms->Add(
+ TextElement::CharClass(zone->New<RegExpCharacterClass>(zone, ranges)),
+ zone);
return zone->New<TextNode>(elms, read_backward, on_success);
}
TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags) {
+ RegExpNode* on_success) {
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
elms->Add(TextElement::CharClass(
- zone->New<RegExpCharacterClass>(zone, lead_ranges, flags)),
+ zone->New<RegExpCharacterClass>(zone, lead_ranges)),
zone);
elms->Add(TextElement::CharClass(
- zone->New<RegExpCharacterClass>(zone, trail_ranges, flags)),
+ zone->New<RegExpCharacterClass>(zone, trail_ranges)),
zone);
return zone->New<TextNode>(elms, read_backward, on_success);
}
@@ -2487,26 +2490,23 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by);
}
-void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
+void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
+ JSRegExp::Flags flags) {
+ if (!IgnoreCase(flags)) return;
+#ifdef V8_INTL_SUPPORT
+ if (NeedsUnicodeCaseEquivalents(flags)) return;
+#endif
+
int element_count = elements()->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elements()->at(i);
if (elm.text_type() == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.char_class();
-#ifdef V8_INTL_SUPPORT
- bool case_equivalents_already_added =
- NeedsUnicodeCaseEquivalents(cc->flags());
-#else
- bool case_equivalents_already_added = false;
-#endif
- if (IgnoreCase(cc->flags()) && !case_equivalents_already_added) {
- // None of the standard character classes is different in the case
- // independent case and it slows us down if we don't know that.
- if (cc->is_standard(zone())) continue;
- ZoneList<CharacterRange>* ranges = cc->ranges(zone());
- CharacterRange::AddCaseEquivalents(isolate, zone(), ranges,
- is_one_byte);
- }
+ // None of the standard character classes is different in the case
+ // independent case and it slows us down if we don't know that.
+ if (cc->is_standard(zone())) continue;
+ ZoneList<CharacterRange>* ranges = cc->ranges(zone());
+ CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
}
}
}
@@ -3634,9 +3634,10 @@ class EatsAtLeastPropagator : public AllStatic {
template <typename... Propagators>
class Analysis : public NodeVisitor {
public:
- Analysis(Isolate* isolate, bool is_one_byte)
+ Analysis(Isolate* isolate, bool is_one_byte, JSRegExp::Flags flags)
: isolate_(isolate),
is_one_byte_(is_one_byte),
+ flags_(flags),
error_(RegExpError::kNone) {}
void EnsureAnalyzed(RegExpNode* that) {
@@ -3677,7 +3678,7 @@ class Analysis : public NodeVisitor {
} while (false)
void VisitText(TextNode* that) override {
- that->MakeCaseIndependent(isolate(), is_one_byte_);
+ that->MakeCaseIndependent(isolate(), is_one_byte_, flags_);
EnsureAnalyzed(that->on_success());
if (has_failed()) return;
that->CalculateOffsets();
@@ -3744,16 +3745,17 @@ class Analysis : public NodeVisitor {
private:
Isolate* isolate_;
- bool is_one_byte_;
+ const bool is_one_byte_;
+ const JSRegExp::Flags flags_;
RegExpError error_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
- RegExpNode* node) {
- Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate,
- is_one_byte);
+ JSRegExp::Flags flags, RegExpNode* node) {
+ Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(
+ isolate, is_one_byte, flags);
DCHECK_EQ(node->info()->been_analyzed, false);
analysis.EnsureAnalyzed(node);
DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone);
@@ -3807,7 +3809,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
return;
}
base::uc16 character = atom->data()[j];
- if (IgnoreCase(atom->flags())) {
+ if (IgnoreCase(bm->compiler()->flags())) {
unibrow::uchar chars[4];
int length = GetCaseIndependentLetters(
isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
@@ -3846,7 +3848,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
}
RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
- RegExpNode* on_success, JSRegExp::Flags flags) {
+ RegExpNode* on_success) {
DCHECK(!read_backward());
ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
@@ -3858,11 +3860,11 @@ RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
int stack_register = UnicodeLookaroundStackRegister();
int position_register = UnicodeLookaroundPositionRegister();
RegExpNode* step_back = TextNode::CreateForCharacterRanges(
- zone(), lead_surrogates, true, on_success, flags);
+ zone(), lead_surrogates, true, on_success);
RegExpLookaround::Builder builder(true, step_back, stack_register,
position_register);
RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
- zone(), trail_surrogates, false, builder.on_match_success(), flags);
+ zone(), trail_surrogates, false, builder.on_match_success());
optional_step_back->AddAlternative(
GuardedAlternative(builder.ForMatch(match_trail)));
@@ -3881,11 +3883,9 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags)) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning or sticky.
- JSRegExp::Flags default_flags = JSRegExp::Flags();
RegExpNode* loop_node = RegExpQuantifier::ToNode(
- 0, RegExpTree::kInfinity, false,
- zone()->New<RegExpCharacterClass>('*', default_flags), this,
- captured_body, data->contains_anchor);
+ 0, RegExpTree::kInfinity, false, zone()->New<RegExpCharacterClass>('*'),
+ this, captured_body, data->contains_anchor);
if (data->contains_anchor) {
// Unroll loop once, to take care of the case that might start
@@ -3893,22 +3893,21 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
first_step_node->AddAlternative(GuardedAlternative(captured_body));
first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
- zone()->New<RegExpCharacterClass>('*', default_flags), false,
- loop_node)));
+ zone()->New<RegExpCharacterClass>('*'), false, loop_node)));
node = first_step_node;
} else {
node = loop_node;
}
}
if (is_one_byte) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != nullptr) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
}
} else if (IsUnicode(flags) && (IsGlobal(flags) || IsSticky(flags))) {
- node = OptionallyStepBackToLeadSurrogate(node, flags);
+ node = OptionallyStepBackToLeadSurrogate(node);
}
if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone());
diff --git a/deps/v8/src/regexp/regexp-compiler.h b/deps/v8/src/regexp/regexp-compiler.h
index 4d53e47094..2be7a48e9a 100644
--- a/deps/v8/src/regexp/regexp-compiler.h
+++ b/deps/v8/src/regexp/regexp-compiler.h
@@ -424,7 +424,8 @@ struct PreloadState {
// Analysis performs assertion propagation and computes eats_at_least_ values.
// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
// details.
-RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
+RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
+ JSRegExp::Flags flags, RegExpNode* node);
class FrequencyCollator {
public:
@@ -474,7 +475,7 @@ class FrequencyCollator {
class RegExpCompiler {
public:
RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
- bool is_one_byte);
+ JSRegExp::Flags flags, bool is_one_byte);
int AllocateRegister() {
if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
@@ -531,8 +532,7 @@ class RegExpCompiler {
// If the regexp matching starts within a surrogate pair, step back to the
// lead surrogate and start matching from there.
- RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpNode* on_success,
- JSRegExp::Flags flags);
+ RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpNode* on_success);
inline void AddWork(RegExpNode* node) {
if (!node->on_work_list() && !node->label()->is_bound()) {
@@ -553,6 +553,8 @@ class RegExpCompiler {
inline void IncrementRecursionDepth() { recursion_depth_++; }
inline void DecrementRecursionDepth() { recursion_depth_--; }
+ JSRegExp::Flags flags() const { return flags_; }
+
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
inline bool one_byte() { return one_byte_; }
@@ -583,6 +585,7 @@ class RegExpCompiler {
int unicode_lookaround_position_register_;
ZoneVector<RegExpNode*>* work_list_;
int recursion_depth_;
+ const JSRegExp::Flags flags_;
RegExpMacroAssembler* macro_assembler_;
bool one_byte_;
bool reg_exp_too_big_;
diff --git a/deps/v8/src/regexp/regexp-nodes.h b/deps/v8/src/regexp/regexp-nodes.h
index 23dbd4cdcb..537cf96201 100644
--- a/deps/v8/src/regexp/regexp-nodes.h
+++ b/deps/v8/src/regexp/regexp-nodes.h
@@ -205,7 +205,9 @@ class RegExpNode : public ZoneObject {
// If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
// itself, or nullptr if the node can never match.
- virtual RegExpNode* FilterOneByte(int depth) { return this; }
+ virtual RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) {
+ return this;
+ }
// Helper for FilterOneByte.
RegExpNode* replacement() {
DCHECK(info()->replacement_calculated);
@@ -294,7 +296,7 @@ class SeqRegExpNode : public RegExpNode {
: RegExpNode(on_success->zone()), on_success_(on_success) {}
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
- RegExpNode* FilterOneByte(int depth) override;
+ RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override {
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
@@ -302,7 +304,7 @@ class SeqRegExpNode : public RegExpNode {
}
protected:
- RegExpNode* FilterSuccessor(int depth);
+ RegExpNode* FilterSuccessor(int depth, JSRegExp::Flags flags);
private:
RegExpNode* on_success_;
@@ -406,15 +408,13 @@ class TextNode : public SeqRegExpNode {
static TextNode* CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges,
bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags);
+ RegExpNode* on_success);
// Create TextNode for a surrogate pair with a range given for the
// lead and the trail surrogate each.
static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags);
+ RegExpNode* on_success);
void Accept(NodeVisitor* visitor) override;
void Emit(RegExpCompiler* compiler, Trace* trace) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
@@ -422,14 +422,15 @@ class TextNode : public SeqRegExpNode {
bool not_at_start) override;
ZoneList<TextElement>* elements() { return elms_; }
bool read_backward() { return read_backward_; }
- void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
+ void MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
+ JSRegExp::Flags flags);
int GreedyLoopTextLength() override;
RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) override;
void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override;
void CalculateOffsets();
- RegExpNode* FilterOneByte(int depth) override;
+ RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
int Length();
private:
@@ -622,7 +623,7 @@ class ChoiceNode : public RegExpNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
- RegExpNode* FilterOneByte(int depth) override;
+ RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
virtual bool read_backward() { return false; }
protected:
@@ -694,7 +695,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
return !is_first;
}
void Accept(NodeVisitor* visitor) override;
- RegExpNode* FilterOneByte(int depth) override;
+ RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
};
class LoopChoiceNode : public ChoiceNode {
@@ -727,7 +728,7 @@ class LoopChoiceNode : public ChoiceNode {
int min_loop_iterations() const { return min_loop_iterations_; }
bool read_backward() override { return read_backward_; }
void Accept(NodeVisitor* visitor) override;
- RegExpNode* FilterOneByte(int depth) override;
+ RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
private:
// AddAlternative is made private for loop nodes because alternatives
diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc
index aaa7b9cf8b..1201e555ad 100644
--- a/deps/v8/src/regexp/regexp-parser.cc
+++ b/deps/v8/src/regexp/regexp-parser.cc
@@ -250,14 +250,10 @@ RegExpTree* RegExpParser::ParseDisjunction() {
return ReportError(RegExpError::kNothingToRepeat);
case '^': {
Advance();
- if (builder->multiline()) {
- builder->AddAssertion(zone()->New<RegExpAssertion>(
- RegExpAssertion::START_OF_LINE, builder->flags()));
- } else {
- builder->AddAssertion(zone()->New<RegExpAssertion>(
- RegExpAssertion::START_OF_INPUT, builder->flags()));
- set_contains_anchor();
- }
+ builder->AddAssertion(zone()->New<RegExpAssertion>(
+ builder->multiline() ? RegExpAssertion::START_OF_LINE
+ : RegExpAssertion::START_OF_INPUT));
+ set_contains_anchor();
continue;
}
case '$': {
@@ -265,8 +261,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
RegExpAssertion::AssertionType assertion_type =
builder->multiline() ? RegExpAssertion::END_OF_LINE
: RegExpAssertion::END_OF_INPUT;
- builder->AddAssertion(
- zone()->New<RegExpAssertion>(assertion_type, builder->flags()));
+ builder->AddAssertion(zone()->New<RegExpAssertion>(assertion_type));
continue;
}
case '.': {
@@ -283,7 +278,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
RegExpCharacterClass* cc =
- zone()->New<RegExpCharacterClass>(zone(), ranges, builder->flags());
+ zone()->New<RegExpCharacterClass>(zone(), ranges);
builder->AddCharacterClass(cc);
break;
}
@@ -305,13 +300,13 @@ RegExpTree* RegExpParser::ParseDisjunction() {
return ReportError(RegExpError::kEscapeAtEndOfPattern);
case 'b':
Advance(2);
- builder->AddAssertion(zone()->New<RegExpAssertion>(
- RegExpAssertion::BOUNDARY, builder->flags()));
+ builder->AddAssertion(
+ zone()->New<RegExpAssertion>(RegExpAssertion::BOUNDARY));
continue;
case 'B':
Advance(2);
- builder->AddAssertion(zone()->New<RegExpAssertion>(
- RegExpAssertion::NON_BOUNDARY, builder->flags()));
+ builder->AddAssertion(
+ zone()->New<RegExpAssertion>(RegExpAssertion::NON_BOUNDARY));
continue;
// AtomEscape ::
// CharacterClassEscape
@@ -330,8 +325,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
zone()->New<ZoneList<CharacterRange>>(2, zone());
CharacterRange::AddClassEscape(
c, ranges, unicode() && builder->ignore_case(), zone());
- RegExpCharacterClass* cc = zone()->New<RegExpCharacterClass>(
- zone(), ranges, builder->flags());
+ RegExpCharacterClass* cc =
+ zone()->New<RegExpCharacterClass>(zone(), ranges);
builder->AddCharacterClass(cc);
break;
}
@@ -346,8 +341,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
ZoneVector<char> name_2(zone());
if (ParsePropertyClassName(&name_1, &name_2)) {
if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {
- RegExpCharacterClass* cc = zone()->New<RegExpCharacterClass>(
- zone(), ranges, builder->flags());
+ RegExpCharacterClass* cc =
+ zone()->New<RegExpCharacterClass>(zone(), ranges);
builder->AddCharacterClass(cc);
break;
}
@@ -605,68 +600,6 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
lookaround_type = RegExpLookaround::LOOKAHEAD;
subexpr_type = NEGATIVE_LOOKAROUND;
break;
- case '-':
- case 'i':
- case 's':
- case 'm': {
- if (!FLAG_regexp_mode_modifiers) {
- ReportError(RegExpError::kInvalidGroup);
- return nullptr;
- }
- Advance();
- bool flags_sense = true; // Switching on flags.
- while (subexpr_type != GROUPING) {
- switch (current()) {
- case '-':
- if (!flags_sense) {
- ReportError(RegExpError::kMultipleFlagDashes);
- return nullptr;
- }
- flags_sense = false;
- Advance();
- continue;
- case 's':
- case 'i':
- case 'm': {
- JSRegExp::Flags bit = JSRegExp::kUnicode;
- if (current() == 'i') bit = JSRegExp::kIgnoreCase;
- if (current() == 'm') bit = JSRegExp::kMultiline;
- if (current() == 's') bit = JSRegExp::kDotAll;
- if (((switch_on | switch_off) & bit) != 0) {
- ReportError(RegExpError::kRepeatedFlag);
- return nullptr;
- }
- if (flags_sense) {
- switch_on |= bit;
- } else {
- switch_off |= bit;
- }
- Advance();
- continue;
- }
- case ')': {
- Advance();
- state->builder()
- ->FlushText(); // Flush pending text using old flags.
- // These (?i)-style flag switches don't put us in a subexpression
- // at all, they just modify the flags in the rest of the current
- // subexpression.
- JSRegExp::Flags flags =
- (state->builder()->flags() | switch_on) & ~switch_off;
- state->builder()->set_flags(flags);
- return state;
- }
- case ':':
- Advance();
- subexpr_type = GROUPING; // Will break us out of the outer loop.
- continue;
- default:
- ReportError(RegExpError::kInvalidFlagGroup);
- return nullptr;
- }
- }
- break;
- }
case '<':
Advance();
if (Next() == '=') {
@@ -1493,7 +1426,7 @@ RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name_1) {
prefix_ranges->Add(CharacterRange::Singleton('#'), zone());
prefix_ranges->Add(CharacterRange::Singleton('*'), zone());
builder.AddCharacterClass(
- zone()->New<RegExpCharacterClass>(zone(), prefix_ranges, flags));
+ zone()->New<RegExpCharacterClass>(zone(), prefix_ranges));
builder.AddCharacter(0xFE0F);
builder.AddCharacter(0x20E3);
return builder.ToRegExp();
@@ -1506,13 +1439,13 @@ RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name_1) {
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER_BASE, "Y", false,
modifier_base_ranges, zone());
builder.AddCharacterClass(
- zone()->New<RegExpCharacterClass>(zone(), modifier_base_ranges, flags));
+ zone()->New<RegExpCharacterClass>(zone(), modifier_base_ranges));
ZoneList<CharacterRange>* modifier_ranges =
zone()->New<ZoneList<CharacterRange>>(2, zone());
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER, "Y", false, modifier_ranges,
zone());
builder.AddCharacterClass(
- zone()->New<RegExpCharacterClass>(zone(), modifier_ranges, flags));
+ zone()->New<RegExpCharacterClass>(zone(), modifier_ranges));
return builder.ToRegExp();
}
@@ -1780,7 +1713,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
Advance();
RegExpCharacterClass::CharacterClassFlags character_class_flags;
if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
- return zone()->New<RegExpCharacterClass>(zone(), ranges, builder->flags(),
+ return zone()->New<RegExpCharacterClass>(zone(), ranges,
character_class_flags);
}
@@ -1874,7 +1807,7 @@ void RegExpBuilder::AddTrailSurrogate(base::uc16 trail_surrogate) {
surrogate_pair.Add(lead_surrogate, zone());
surrogate_pair.Add(trail_surrogate, zone());
RegExpAtom* atom =
- zone()->New<RegExpAtom>(surrogate_pair.ToConstVector(), flags_);
+ zone()->New<RegExpAtom>(surrogate_pair.ToConstVector());
AddAtom(atom);
}
} else {
@@ -1897,8 +1830,7 @@ void RegExpBuilder::FlushCharacters() {
FlushPendingSurrogate();
pending_empty_ = false;
if (characters_ != nullptr) {
- RegExpTree* atom =
- zone()->New<RegExpAtom>(characters_->ToConstVector(), flags_);
+ RegExpTree* atom = zone()->New<RegExpAtom>(characters_->ToConstVector());
characters_ = nullptr;
text_.Add(atom, zone());
LAST(ADD_ATOM);
@@ -1972,8 +1904,7 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
void RegExpBuilder::AddCharacterClassForDesugaring(base::uc32 c) {
AddTerm(zone()->New<RegExpCharacterClass>(
- zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)),
- flags_));
+ zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c))));
}
void RegExpBuilder::AddAtom(RegExpTree* term) {
@@ -2083,11 +2014,11 @@ bool RegExpBuilder::AddQuantifierToAtom(
if (num_chars > 1) {
base::Vector<const base::uc16> prefix =
char_vector.SubVector(0, num_chars - 1);
- text_.Add(zone()->New<RegExpAtom>(prefix, flags_), zone());
+ text_.Add(zone()->New<RegExpAtom>(prefix), zone());
char_vector = char_vector.SubVector(num_chars - 1, num_chars);
}
characters_ = nullptr;
- atom = zone()->New<RegExpAtom>(char_vector, flags_);
+ atom = zone()->New<RegExpAtom>(char_vector);
FlushText();
} else if (text_.length() > 0) {
DCHECK(last_added_ == ADD_ATOM);
diff --git a/deps/v8/src/regexp/regexp.cc b/deps/v8/src/regexp/regexp.cc
index d2e5f1a1f9..9bdebe1918 100644
--- a/deps/v8/src/regexp/regexp.cc
+++ b/deps/v8/src/regexp/regexp.cc
@@ -225,7 +225,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
ASSIGN_RETURN_ON_EXCEPTION(
isolate, atom_string,
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
- if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
+ if (!IgnoreCase(flags) && !HasFewDifferentCharacters(atom_string)) {
RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string);
has_been_compiled = true;
}
@@ -802,7 +802,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
return false;
}
- RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
+ RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
+ is_one_byte);
if (compiler.optimize()) {
compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern));
@@ -821,7 +822,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
}
data->node = compiler.PreprocessRegExp(data, flags, is_one_byte);
- data->error = AnalyzeRegExp(isolate, is_one_byte, data->node);
+ data->error = AnalyzeRegExp(isolate, is_one_byte, flags, data->node);
if (data->error != RegExpError::kNone) {
return false;
}