summaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp/jsregexp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/regexp/jsregexp.cc')
-rw-r--r--deps/v8/src/regexp/jsregexp.cc47
1 files changed, 30 insertions, 17 deletions
diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc
index 8ab2681dcf..61cabd0b94 100644
--- a/deps/v8/src/regexp/jsregexp.cc
+++ b/deps/v8/src/regexp/jsregexp.cc
@@ -26,10 +26,10 @@
#include "src/string-search.h"
#include "src/unicode-decoder.h"
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
#include "unicode/uniset.h"
#include "unicode/utypes.h"
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
#ifndef V8_INTERPRETED_REGEXP
#if V8_TARGET_ARCH_IA32
@@ -3327,9 +3327,8 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
RegExpNode* on_success) {
DCHECK_NOT_NULL(ranges);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
- elms->Add(
- TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)),
- zone);
+ elms->Add(TextElement::CharClass(new (zone) RegExpCharacterClass(ranges)),
+ zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
@@ -3341,12 +3340,12 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
- elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(lead_ranges, false)),
- zone);
- elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(trail_ranges, false)),
- zone);
+ elms->Add(
+ TextElement::CharClass(new (zone) RegExpCharacterClass(lead_ranges)),
+ zone);
+ elms->Add(
+ TextElement::CharClass(new (zone) RegExpCharacterClass(trail_ranges)),
+ zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
@@ -4851,7 +4850,7 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges,
bool RegExpCharacterClass::is_standard(Zone* zone) {
// TODO(lrn): Remove need for this function, by not throwing away information
// along the way.
- if (is_negated_) {
+ if (is_negated()) {
return false;
}
if (set_.is_standard()) {
@@ -5114,7 +5113,7 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
}
void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
// Use ICU to compute the case fold closure over the ranges.
icu::UnicodeSet set;
for (int i = 0; i < ranges->length(); i++) {
@@ -5132,7 +5131,7 @@ void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
}
// No errors and everything we collected have been ranges.
CharacterRange::Canonicalize(ranges);
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
}
@@ -5144,7 +5143,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
if (compiler->needs_unicode_case_equivalents()) {
AddUnicodeCaseEquivalents(ranges, zone);
}
- if (compiler->unicode() && !compiler->one_byte()) {
+ if (compiler->unicode() && !compiler->one_byte() &&
+ !contains_split_surrogate()) {
if (is_negated()) {
ZoneList<CharacterRange>* negated =
new (zone) ZoneList<CharacterRange>(2, zone);
@@ -5154,7 +5154,7 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
if (ranges->length() == 0) {
ranges->Add(CharacterRange::Everything(), zone);
RegExpCharacterClass* fail =
- new (zone) RegExpCharacterClass(ranges, true);
+ new (zone) RegExpCharacterClass(ranges, NEGATED);
return new (zone) TextNode(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
@@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
Zone* zone = compiler->zone();
ZoneList<RegExpTree*>* alternatives = this->alternatives();
int length = alternatives->length();
+ const bool unicode = compiler->unicode();
int write_posn = 0;
int i = 0;
@@ -5368,6 +5369,10 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
i++;
continue;
}
+ DCHECK_IMPLIES(unicode,
+ !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+ bool contains_trail_surrogate =
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
int first_in_run = i;
i++;
while (i < length) {
@@ -5375,6 +5380,10 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (!alternative->IsAtom()) break;
atom = alternative->AsAtom();
if (atom->length() != 1) break;
+ DCHECK_IMPLIES(unicode,
+ !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+ contains_trail_surrogate |=
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
i++;
}
if (i > first_in_run + 1) {
@@ -5387,8 +5396,12 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
DCHECK_EQ(old_atom->length(), 1);
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
}
+ RegExpCharacterClass::Flags flags;
+ if (unicode && contains_trail_surrogate) {
+ flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
+ }
alternatives->at(write_posn++) =
- new (zone) RegExpCharacterClass(ranges, false);
+ new (zone) RegExpCharacterClass(ranges, flags);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {