diff options
Diffstat (limited to 'deps/v8/src/regexp/jsregexp.cc')
-rw-r--r-- | deps/v8/src/regexp/jsregexp.cc | 47 |
1 files changed, 30 insertions, 17 deletions
diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc index 8ab2681dcf..61cabd0b94 100644 --- a/deps/v8/src/regexp/jsregexp.cc +++ b/deps/v8/src/regexp/jsregexp.cc @@ -26,10 +26,10 @@ #include "src/string-search.h" #include "src/unicode-decoder.h" -#ifdef V8_I18N_SUPPORT +#ifdef V8_INTL_SUPPORT #include "unicode/uniset.h" #include "unicode/utypes.h" -#endif // V8_I18N_SUPPORT +#endif // V8_INTL_SUPPORT #ifndef V8_INTERPRETED_REGEXP #if V8_TARGET_ARCH_IA32 @@ -3327,9 +3327,8 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone, RegExpNode* on_success) { DCHECK_NOT_NULL(ranges); ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone); - elms->Add( - TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)), - zone); + elms->Add(TextElement::CharClass(new (zone) RegExpCharacterClass(ranges)), + zone); return new (zone) TextNode(elms, read_backward, on_success); } @@ -3341,12 +3340,12 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead); ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail); ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone); - elms->Add(TextElement::CharClass( - new (zone) RegExpCharacterClass(lead_ranges, false)), - zone); - elms->Add(TextElement::CharClass( - new (zone) RegExpCharacterClass(trail_ranges, false)), - zone); + elms->Add( + TextElement::CharClass(new (zone) RegExpCharacterClass(lead_ranges)), + zone); + elms->Add( + TextElement::CharClass(new (zone) RegExpCharacterClass(trail_ranges)), + zone); return new (zone) TextNode(elms, read_backward, on_success); } @@ -4851,7 +4850,7 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges, bool RegExpCharacterClass::is_standard(Zone* zone) { // TODO(lrn): Remove need for this function, by not throwing away information // along the way. - if (is_negated_) { + if (is_negated()) { return false; } if (set_.is_standard()) { @@ -5114,7 +5113,7 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler, } void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) { -#ifdef V8_I18N_SUPPORT +#ifdef V8_INTL_SUPPORT // Use ICU to compute the case fold closure over the ranges. icu::UnicodeSet set; for (int i = 0; i < ranges->length(); i++) { @@ -5132,7 +5131,7 @@ void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) { } // No errors and everything we collected have been ranges. CharacterRange::Canonicalize(ranges); -#endif // V8_I18N_SUPPORT +#endif // V8_INTL_SUPPORT } @@ -5144,7 +5143,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, if (compiler->needs_unicode_case_equivalents()) { AddUnicodeCaseEquivalents(ranges, zone); } - if (compiler->unicode() && !compiler->one_byte()) { + if (compiler->unicode() && !compiler->one_byte() && + !contains_split_surrogate()) { if (is_negated()) { ZoneList<CharacterRange>* negated = new (zone) ZoneList<CharacterRange>(2, zone); @@ -5154,7 +5154,7 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, if (ranges->length() == 0) { ranges->Add(CharacterRange::Everything(), zone); RegExpCharacterClass* fail = - new (zone) RegExpCharacterClass(ranges, true); + new (zone) RegExpCharacterClass(ranges, NEGATED); return new (zone) TextNode(fail, compiler->read_backward(), on_success); } if (standard_type() == '*') { @@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( Zone* zone = compiler->zone(); ZoneList<RegExpTree*>* alternatives = this->alternatives(); int length = alternatives->length(); + const bool unicode = compiler->unicode(); int write_posn = 0; int i = 0; @@ -5368,6 +5369,10 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( i++; continue; } + DCHECK_IMPLIES(unicode, + !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); + bool contains_trail_surrogate = + unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); int first_in_run = i; i++; while (i < length) { @@ -5375,6 +5380,10 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( if (!alternative->IsAtom()) break; atom = alternative->AsAtom(); if (atom->length() != 1) break; + DCHECK_IMPLIES(unicode, + !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); + contains_trail_surrogate |= + unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); i++; } if (i > first_in_run + 1) { @@ -5387,8 +5396,12 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( DCHECK_EQ(old_atom->length(), 1); ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); } + RegExpCharacterClass::Flags flags; + if (unicode && contains_trail_surrogate) { + flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE; + } alternatives->at(write_posn++) = - new (zone) RegExpCharacterClass(ranges, false); + new (zone) RegExpCharacterClass(ranges, flags); } else { // Just copy any trivial alternatives. for (int j = first_in_run; j < i; j++) { |