summaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/regexp')
-rw-r--r--deps/v8/src/regexp/interpreter-irregexp.cc1
-rw-r--r--deps/v8/src/regexp/jsregexp.cc31
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc3
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-tracer.cc1
-rw-r--r--deps/v8/src/regexp/regexp-parser.cc56
-rw-r--r--deps/v8/src/regexp/regexp-utils.cc9
-rw-r--r--deps/v8/src/regexp/x87/OWNERS1
7 files changed, 48 insertions, 54 deletions
diff --git a/deps/v8/src/regexp/interpreter-irregexp.cc b/deps/v8/src/regexp/interpreter-irregexp.cc
index 14834d512a..4f8f96a536 100644
--- a/deps/v8/src/regexp/interpreter-irregexp.cc
+++ b/deps/v8/src/regexp/interpreter-irregexp.cc
@@ -9,6 +9,7 @@
#include "src/regexp/interpreter-irregexp.h"
#include "src/ast/ast.h"
+#include "src/objects-inl.h"
#include "src/regexp/bytecodes-irregexp.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-macro-assembler.h"
diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc
index f0abc9a8b3..8b21459059 100644
--- a/deps/v8/src/regexp/jsregexp.cc
+++ b/deps/v8/src/regexp/jsregexp.cc
@@ -27,7 +27,7 @@
#include "src/unicode-decoder.h"
#ifdef V8_I18N_SUPPORT
-#include "unicode/uset.h"
+#include "unicode/uniset.h"
#include "unicode/utypes.h"
#endif // V8_I18N_SUPPORT
@@ -451,7 +451,7 @@ void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject) {
- subject = String::Flatten(subject);
+ DCHECK(subject->IsFlat());
// Check representation of the underlying storage.
bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
@@ -565,6 +565,8 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
Isolate* isolate = regexp->GetIsolate();
DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+ subject = String::Flatten(subject);
+
// Prepare space for the return values.
#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
if (FLAG_trace_regexp_bytecodes) {
@@ -5114,30 +5116,22 @@ void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
// Use ICU to compute the case fold closure over the ranges.
DCHECK(compiler->unicode());
DCHECK(compiler->ignore_case());
- USet* set = uset_openEmpty();
+ icu::UnicodeSet set;
for (int i = 0; i < ranges->length(); i++) {
- uset_addRange(set, ranges->at(i).from(), ranges->at(i).to());
+ set.add(ranges->at(i).from(), ranges->at(i).to());
}
ranges->Clear();
- uset_closeOver(set, USET_CASE_INSENSITIVE);
+ set.closeOver(USET_CASE_INSENSITIVE);
// Full case mapping map single characters to multiple characters.
// Those are represented as strings in the set. Remove them so that
// we end up with only simple and common case mappings.
- uset_removeAllStrings(set);
- int item_count = uset_getItemCount(set);
- int item_result = 0;
- UErrorCode ec = U_ZERO_ERROR;
+ set.removeAllStrings();
Zone* zone = compiler->zone();
- for (int i = 0; i < item_count; i++) {
- uc32 start = 0;
- uc32 end = 0;
- item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
- ranges->Add(CharacterRange::Range(start, end), zone);
+ for (int i = 0; i < set.getRangeCount(); i++) {
+ ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
+ zone);
}
// No errors and everything we collected have been ranges.
- DCHECK_EQ(U_ZERO_ERROR, ec);
- DCHECK_EQ(0, item_result);
- uset_close(set);
#else
// Fallback if ICU is not included.
CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(),
@@ -6742,8 +6736,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
// Inserted here, instead of in Assembler, because it depends on information
// in the AST that isn't replicated in the Node structure.
static const int kMaxBacksearchLimit = 1024;
- if (is_end_anchored &&
- !is_start_anchored &&
+ if (is_end_anchored && !is_start_anchored && !is_sticky &&
max_length < kMaxBacksearchLimit) {
macro_assembler.SetCurrentPositionFromEnd(max_length);
}
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc
index a0bb5e7d73..3316c33229 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc
@@ -7,9 +7,10 @@
#include "src/regexp/regexp-macro-assembler-irregexp.h"
#include "src/ast/ast.h"
+#include "src/objects-inl.h"
#include "src/regexp/bytecodes-irregexp.h"
-#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-macro-assembler-irregexp-inl.h"
+#include "src/regexp/regexp-macro-assembler.h"
namespace v8 {
namespace internal {
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
index abdf577209..d311a09e41 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
@@ -5,6 +5,7 @@
#include "src/regexp/regexp-macro-assembler-tracer.h"
#include "src/ast/ast.h"
+#include "src/objects-inl.h"
namespace v8 {
namespace internal {
diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc
index fd3123f674..3035f6a9a9 100644
--- a/deps/v8/src/regexp/regexp-parser.cc
+++ b/deps/v8/src/regexp/regexp-parser.cc
@@ -13,7 +13,7 @@
#include "src/utils.h"
#ifdef V8_I18N_SUPPORT
-#include "unicode/uset.h"
+#include "unicode/uniset.h"
#endif // V8_I18N_SUPPORT
namespace v8 {
@@ -75,6 +75,7 @@ void RegExpParser::Advance() {
if (has_next()) {
StackLimitCheck check(isolate());
if (check.HasOverflowed()) {
+ if (FLAG_abort_on_stack_overflow) FATAL("Aborting on stack overflow");
ReportError(CStrVector(
MessageTemplate::TemplateString(MessageTemplate::kStackOverflow)));
} else if (zone()->excess_allocation()) {
@@ -1082,37 +1083,37 @@ bool IsExactPropertyValueAlias(const char* property_value_name,
bool LookupPropertyValueName(UProperty property,
const char* property_value_name, bool negate,
ZoneList<CharacterRange>* result, Zone* zone) {
+ UProperty property_for_lookup = property;
+ if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
+ // For the property Script_Extensions, we have to do the property value
+ // name lookup as if the property is Script.
+ property_for_lookup = UCHAR_SCRIPT;
+ }
int32_t property_value =
- u_getPropertyValueEnum(property, property_value_name);
+ u_getPropertyValueEnum(property_for_lookup, property_value_name);
if (property_value == UCHAR_INVALID_CODE) return false;
// We require the property name to match exactly to one of the property value
// aliases. However, u_getPropertyValueEnum uses loose matching.
- if (!IsExactPropertyValueAlias(property_value_name, property,
+ if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup,
property_value)) {
return false;
}
- USet* set = uset_openEmpty();
UErrorCode ec = U_ZERO_ERROR;
- uset_applyIntPropertyValue(set, property, property_value, &ec);
- bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set);
+ icu::UnicodeSet set;
+ set.applyIntPropertyValue(property, property_value, ec);
+ bool success = ec == U_ZERO_ERROR && !set.isEmpty();
if (success) {
- uset_removeAllStrings(set);
- if (negate) uset_complement(set);
- int item_count = uset_getItemCount(set);
- int item_result = 0;
- for (int i = 0; i < item_count; i++) {
- uc32 start = 0;
- uc32 end = 0;
- item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
- result->Add(CharacterRange::Range(start, end), zone);
+ set.removeAllStrings();
+ if (negate) set.complement();
+ for (int i = 0; i < set.getRangeCount(); i++) {
+ result->Add(
+ CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
+ zone);
}
- DCHECK_EQ(U_ZERO_ERROR, ec);
- DCHECK_EQ(0, item_result);
}
- uset_close(set);
return success;
}
@@ -1196,9 +1197,14 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
const char* property_name = first_part.ToConstVector().start();
const char* value_name = second_part.ToConstVector().start();
UProperty property = u_getPropertyEnum(property_name);
- if (property < UCHAR_INT_START) return false;
- if (property >= UCHAR_INT_LIMIT) return false;
if (!IsExactPropertyAlias(property_name, property)) return false;
+ if (property == UCHAR_GENERAL_CATEGORY) {
+ // We want to allow aggregate value names such as "Letter".
+ property = UCHAR_GENERAL_CATEGORY_MASK;
+ } else if (property != UCHAR_SCRIPT &&
+ property != UCHAR_SCRIPT_EXTENSIONS) {
+ return false;
+ }
return LookupPropertyValueName(property, value_name, negate, result,
zone());
}
@@ -1720,12 +1726,10 @@ bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
#ifdef V8_I18N_SUPPORT
if (unicode() && ignore_case()) {
- USet* set = uset_open(c, c);
- uset_closeOver(set, USET_CASE_INSENSITIVE);
- uset_removeAllStrings(set);
- bool result = uset_size(set) > 1;
- uset_close(set);
- return result;
+ icu::UnicodeSet set(c, c);
+ set.closeOver(USET_CASE_INSENSITIVE);
+ set.removeAllStrings();
+ return set.size() > 1;
}
// In the case where ICU is not included, we act as if the unicode flag is
// not set, and do not desugar.
diff --git a/deps/v8/src/regexp/regexp-utils.cc b/deps/v8/src/regexp/regexp-utils.cc
index 62daf3f1d5..d40431866a 100644
--- a/deps/v8/src/regexp/regexp-utils.cc
+++ b/deps/v8/src/regexp/regexp-utils.cc
@@ -118,12 +118,6 @@ Maybe<bool> RegExpUtils::IsRegExp(Isolate* isolate, Handle<Object> object) {
Handle<JSReceiver> receiver = Handle<JSReceiver>::cast(object);
- if (isolate->regexp_function()->initial_map() == receiver->map()) {
- // Fast-path for unmodified JSRegExp instances.
- // TODO(ishell): Adapt for new fast-path logic.
- return Just(true);
- }
-
Handle<Object> match;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
isolate, match,
@@ -180,8 +174,7 @@ MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex(
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj), Object);
-
- const int last_index = Handle<Smi>::cast(last_index_obj)->value();
+ const int last_index = PositiveNumberToUint32(*last_index_obj);
const int new_last_index =
AdvanceStringIndex(isolate, string, last_index, unicode);
diff --git a/deps/v8/src/regexp/x87/OWNERS b/deps/v8/src/regexp/x87/OWNERS
index dd9998b261..61245ae8e2 100644
--- a/deps/v8/src/regexp/x87/OWNERS
+++ b/deps/v8/src/regexp/x87/OWNERS
@@ -1 +1,2 @@
weiliang.lin@intel.com
+chunyang.dai@intel.com