diff options
Diffstat (limited to 'deps/v8/src/regexp')
-rw-r--r-- | deps/v8/src/regexp/interpreter-irregexp.cc | 1 | ||||
-rw-r--r-- | deps/v8/src/regexp/jsregexp.cc | 31 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc | 3 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-macro-assembler-tracer.cc | 1 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-parser.cc | 56 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-utils.cc | 9 | ||||
-rw-r--r-- | deps/v8/src/regexp/x87/OWNERS | 1 |
7 files changed, 48 insertions, 54 deletions
diff --git a/deps/v8/src/regexp/interpreter-irregexp.cc b/deps/v8/src/regexp/interpreter-irregexp.cc index 14834d512a..4f8f96a536 100644 --- a/deps/v8/src/regexp/interpreter-irregexp.cc +++ b/deps/v8/src/regexp/interpreter-irregexp.cc @@ -9,6 +9,7 @@ #include "src/regexp/interpreter-irregexp.h" #include "src/ast/ast.h" +#include "src/objects-inl.h" #include "src/regexp/bytecodes-irregexp.h" #include "src/regexp/jsregexp.h" #include "src/regexp/regexp-macro-assembler.h" diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc index f0abc9a8b3..8b21459059 100644 --- a/deps/v8/src/regexp/jsregexp.cc +++ b/deps/v8/src/regexp/jsregexp.cc @@ -27,7 +27,7 @@ #include "src/unicode-decoder.h" #ifdef V8_I18N_SUPPORT -#include "unicode/uset.h" +#include "unicode/uniset.h" #include "unicode/utypes.h" #endif // V8_I18N_SUPPORT @@ -451,7 +451,7 @@ void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, Handle<String> subject) { - subject = String::Flatten(subject); + DCHECK(subject->IsFlat()); // Check representation of the underlying storage. bool is_one_byte = subject->IsOneByteRepresentationUnderneath(); @@ -565,6 +565,8 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec( Isolate* isolate = regexp->GetIsolate(); DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); + subject = String::Flatten(subject); + // Prepare space for the return values. #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) if (FLAG_trace_regexp_bytecodes) { @@ -5114,30 +5116,22 @@ void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, // Use ICU to compute the case fold closure over the ranges. DCHECK(compiler->unicode()); DCHECK(compiler->ignore_case()); - USet* set = uset_openEmpty(); + icu::UnicodeSet set; for (int i = 0; i < ranges->length(); i++) { - uset_addRange(set, ranges->at(i).from(), ranges->at(i).to()); + set.add(ranges->at(i).from(), ranges->at(i).to()); } ranges->Clear(); - uset_closeOver(set, USET_CASE_INSENSITIVE); + set.closeOver(USET_CASE_INSENSITIVE); // Full case mapping map single characters to multiple characters. // Those are represented as strings in the set. Remove them so that // we end up with only simple and common case mappings. - uset_removeAllStrings(set); - int item_count = uset_getItemCount(set); - int item_result = 0; - UErrorCode ec = U_ZERO_ERROR; + set.removeAllStrings(); Zone* zone = compiler->zone(); - for (int i = 0; i < item_count; i++) { - uc32 start = 0; - uc32 end = 0; - item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); - ranges->Add(CharacterRange::Range(start, end), zone); + for (int i = 0; i < set.getRangeCount(); i++) { + ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)), + zone); } // No errors and everything we collected have been ranges. - DCHECK_EQ(U_ZERO_ERROR, ec); - DCHECK_EQ(0, item_result); - uset_close(set); #else // Fallback if ICU is not included. CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(), @@ -6742,8 +6736,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( // Inserted here, instead of in Assembler, because it depends on information // in the AST that isn't replicated in the Node structure. static const int kMaxBacksearchLimit = 1024; - if (is_end_anchored && - !is_start_anchored && + if (is_end_anchored && !is_start_anchored && !is_sticky && max_length < kMaxBacksearchLimit) { macro_assembler.SetCurrentPositionFromEnd(max_length); } diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc index a0bb5e7d73..3316c33229 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc @@ -7,9 +7,10 @@ #include "src/regexp/regexp-macro-assembler-irregexp.h" #include "src/ast/ast.h" +#include "src/objects-inl.h" #include "src/regexp/bytecodes-irregexp.h" -#include "src/regexp/regexp-macro-assembler.h" #include "src/regexp/regexp-macro-assembler-irregexp-inl.h" +#include "src/regexp/regexp-macro-assembler.h" namespace v8 { namespace internal { diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc index abdf577209..d311a09e41 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc @@ -5,6 +5,7 @@ #include "src/regexp/regexp-macro-assembler-tracer.h" #include "src/ast/ast.h" +#include "src/objects-inl.h" namespace v8 { namespace internal { diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc index fd3123f674..3035f6a9a9 100644 --- a/deps/v8/src/regexp/regexp-parser.cc +++ b/deps/v8/src/regexp/regexp-parser.cc @@ -13,7 +13,7 @@ #include "src/utils.h" #ifdef V8_I18N_SUPPORT -#include "unicode/uset.h" +#include "unicode/uniset.h" #endif // V8_I18N_SUPPORT namespace v8 { @@ -75,6 +75,7 @@ void RegExpParser::Advance() { if (has_next()) { StackLimitCheck check(isolate()); if (check.HasOverflowed()) { + if (FLAG_abort_on_stack_overflow) FATAL("Aborting on stack overflow"); ReportError(CStrVector( MessageTemplate::TemplateString(MessageTemplate::kStackOverflow))); } else if (zone()->excess_allocation()) { @@ -1082,37 +1083,37 @@ bool IsExactPropertyValueAlias(const char* property_value_name, bool LookupPropertyValueName(UProperty property, const char* property_value_name, bool negate, ZoneList<CharacterRange>* result, Zone* zone) { + UProperty property_for_lookup = property; + if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { + // For the property Script_Extensions, we have to do the property value + // name lookup as if the property is Script. + property_for_lookup = UCHAR_SCRIPT; + } int32_t property_value = - u_getPropertyValueEnum(property, property_value_name); + u_getPropertyValueEnum(property_for_lookup, property_value_name); if (property_value == UCHAR_INVALID_CODE) return false; // We require the property name to match exactly to one of the property value // aliases. However, u_getPropertyValueEnum uses loose matching. - if (!IsExactPropertyValueAlias(property_value_name, property, + if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup, property_value)) { return false; } - USet* set = uset_openEmpty(); UErrorCode ec = U_ZERO_ERROR; - uset_applyIntPropertyValue(set, property, property_value, &ec); - bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); + icu::UnicodeSet set; + set.applyIntPropertyValue(property, property_value, ec); + bool success = ec == U_ZERO_ERROR && !set.isEmpty(); if (success) { - uset_removeAllStrings(set); - if (negate) uset_complement(set); - int item_count = uset_getItemCount(set); - int item_result = 0; - for (int i = 0; i < item_count; i++) { - uc32 start = 0; - uc32 end = 0; - item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); - result->Add(CharacterRange::Range(start, end), zone); + set.removeAllStrings(); + if (negate) set.complement(); + for (int i = 0; i < set.getRangeCount(); i++) { + result->Add( + CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)), + zone); } - DCHECK_EQ(U_ZERO_ERROR, ec); - DCHECK_EQ(0, item_result); } - uset_close(set); return success; } @@ -1196,9 +1197,14 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, const char* property_name = first_part.ToConstVector().start(); const char* value_name = second_part.ToConstVector().start(); UProperty property = u_getPropertyEnum(property_name); - if (property < UCHAR_INT_START) return false; - if (property >= UCHAR_INT_LIMIT) return false; if (!IsExactPropertyAlias(property_name, property)) return false; + if (property == UCHAR_GENERAL_CATEGORY) { + // We want to allow aggregate value names such as "Letter". + property = UCHAR_GENERAL_CATEGORY_MASK; + } else if (property != UCHAR_SCRIPT && + property != UCHAR_SCRIPT_EXTENSIONS) { + return false; + } return LookupPropertyValueName(property, value_name, negate, result, zone()); } @@ -1720,12 +1726,10 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) { bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) { #ifdef V8_I18N_SUPPORT if (unicode() && ignore_case()) { - USet* set = uset_open(c, c); - uset_closeOver(set, USET_CASE_INSENSITIVE); - uset_removeAllStrings(set); - bool result = uset_size(set) > 1; - uset_close(set); - return result; + icu::UnicodeSet set(c, c); + set.closeOver(USET_CASE_INSENSITIVE); + set.removeAllStrings(); + return set.size() > 1; } // In the case where ICU is not included, we act as if the unicode flag is // not set, and do not desugar. diff --git a/deps/v8/src/regexp/regexp-utils.cc b/deps/v8/src/regexp/regexp-utils.cc index 62daf3f1d5..d40431866a 100644 --- a/deps/v8/src/regexp/regexp-utils.cc +++ b/deps/v8/src/regexp/regexp-utils.cc @@ -118,12 +118,6 @@ Maybe<bool> RegExpUtils::IsRegExp(Isolate* isolate, Handle<Object> object) { Handle<JSReceiver> receiver = Handle<JSReceiver>::cast(object); - if (isolate->regexp_function()->initial_map() == receiver->map()) { - // Fast-path for unmodified JSRegExp instances. - // TODO(ishell): Adapt for new fast-path logic. - return Just(true); - } - Handle<Object> match; ASSIGN_RETURN_ON_EXCEPTION_VALUE( isolate, match, @@ -180,8 +174,7 @@ MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex( ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj, Object::ToLength(isolate, last_index_obj), Object); - - const int last_index = Handle<Smi>::cast(last_index_obj)->value(); + const int last_index = PositiveNumberToUint32(*last_index_obj); const int new_last_index = AdvanceStringIndex(isolate, string, last_index, unicode); diff --git a/deps/v8/src/regexp/x87/OWNERS b/deps/v8/src/regexp/x87/OWNERS index dd9998b261..61245ae8e2 100644 --- a/deps/v8/src/regexp/x87/OWNERS +++ b/deps/v8/src/regexp/x87/OWNERS @@ -1 +1,2 @@ weiliang.lin@intel.com +chunyang.dai@intel.com |