summaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp
diff options
context:
space:
mode:
authorMichaël Zasso <targos@protonmail.com>2018-03-07 08:54:53 +0100
committerMichaël Zasso <targos@protonmail.com>2018-03-07 16:48:52 +0100
commit88786fecff336342a56e6f2e7ff3b286be716e47 (patch)
tree92e6ba5b8ac8dae1a058988d20c9d27bfa654390 /deps/v8/src/regexp
parent4e86f9b5ab83cbabf43839385bf383e6a7ef7d19 (diff)
downloadnode-new-88786fecff336342a56e6f2e7ff3b286be716e47.tar.gz
deps: update V8 to 6.5.254.31
PR-URL: https://github.com/nodejs/node/pull/18453 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Yang Guo <yangguo@chromium.org> Reviewed-By: Ali Ijaz Sheikh <ofrobots@google.com> Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
Diffstat (limited to 'deps/v8/src/regexp')
-rw-r--r--deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc35
-rw-r--r--deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc36
-rw-r--r--deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc32
-rw-r--r--deps/v8/src/regexp/jsregexp.cc76
-rw-r--r--deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc37
-rw-r--r--deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc37
-rw-r--r--deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc35
-rw-r--r--deps/v8/src/regexp/regexp-ast.h12
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler.cc40
-rw-r--r--deps/v8/src/regexp/regexp-parser.cc42
-rw-r--r--deps/v8/src/regexp/regexp-utils.cc2
-rw-r--r--deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc37
-rw-r--r--deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc32
13 files changed, 240 insertions, 213 deletions
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
index 2e6425568b..5f9d3905a3 100644
--- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
@@ -86,8 +86,7 @@ namespace internal {
* bool direct_call = false,
* Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in arm/simulator-arm.h.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -506,12 +505,12 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
Label success;
__ cmp(current_character(), Operand(' '));
__ b(eq, &success);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ sub(r0, current_character(), Operand('\t'));
__ cmp(r0, Operand('\r' - '\t'));
__ b(ls, &success);
// \u00a0 (NBSP).
- __ cmp(r0, Operand(0x00a0 - '\t'));
+ __ cmp(r0, Operand(0x00A0 - '\t'));
BranchOrBacktrack(ne, on_no_match);
__ bind(&success);
return true;
@@ -533,37 +532,37 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(ls, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ eor(r0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(r0, r0, Operand(0x0b));
- __ cmp(r0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(r0, r0, Operand(0x0B));
+ __ cmp(r0, Operand(0x0C - 0x0B));
BranchOrBacktrack(ls, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(r0, r0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(r0, r0, Operand(0x2028 - 0x0B));
__ cmp(r0, Operand(1));
BranchOrBacktrack(ls, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ eor(r0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(r0, r0, Operand(0x0b));
- __ cmp(r0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(r0, r0, Operand(0x0B));
+ __ cmp(r0, Operand(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(hi, on_no_match);
} else {
Label done;
__ b(ls, &done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(r0, r0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(r0, r0, Operand(0x2028 - 0x0B));
__ cmp(r0, Operand(1));
BranchOrBacktrack(hi, on_no_match);
__ bind(&done);
diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
index 558ee673f1..5f77ff4021 100644
--- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@@ -96,8 +96,7 @@ namespace internal {
* bool direct_call = false,
* Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in arm64/simulator-arm64.h.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -116,7 +115,6 @@ RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
success_label_(),
backtrack_label_(),
exit_label_() {
- __ SetStackPointer(csp);
DCHECK_EQ(0, registers_to_save % 2);
// We can cache at most 16 W registers in x0-x7.
STATIC_ASSERT(kNumCachedRegisters <= 16);
@@ -366,7 +364,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
__ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
__ Ccmp(current_input_offset(), 0, NoFlag, eq);
// The current input offset should be <= 0, and fit in a W register.
- __ Check(le, kOffsetOutOfRange);
+ __ Check(le, AbortReason::kOffsetOutOfRange);
}
} else {
DCHECK(mode_ == UC16);
@@ -503,7 +501,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
__ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
__ Ccmp(current_input_offset(), 0, NoFlag, eq);
// The current input offset should be <= 0, and fit in a W register.
- __ Check(le, kOffsetOutOfRange);
+ __ Check(le, AbortReason::kOffsetOutOfRange);
}
__ Bind(&fallthrough);
}
@@ -588,11 +586,11 @@ bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
- // Check for ' ' or 0x00a0.
+ // Check for ' ' or 0x00A0.
__ Cmp(current_character(), ' ');
- __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
+ __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
__ B(eq, &success);
- // Check range 0x09..0x0d.
+ // Check range 0x09..0x0D.
__ Sub(w10, current_character(), '\t');
CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
__ Bind(&success);
@@ -613,12 +611,12 @@ bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
// Here we emit the conditional branch only once at the end to make branch
// prediction more efficient, even though we could branch out of here
// as soon as a character matches.
- __ Cmp(current_character(), 0x0a);
- __ Ccmp(current_character(), 0x0d, ZFlag, ne);
+ __ Cmp(current_character(), 0x0A);
+ __ Ccmp(current_character(), 0x0D, ZFlag, ne);
if (mode_ == UC16) {
__ Sub(w10, current_character(), 0x2028);
// If the Z flag was set we clear the flags to force a branch.
@@ -631,11 +629,11 @@ bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
// We have to check all 4 newline characters before emitting
// the conditional branch.
- __ Cmp(current_character(), 0x0a);
- __ Ccmp(current_character(), 0x0d, ZFlag, ne);
+ __ Cmp(current_character(), 0x0A);
+ __ Ccmp(current_character(), 0x0D, ZFlag, ne);
if (mode_ == UC16) {
__ Sub(w10, current_character(), 0x2028);
// If the Z flag was set we clear the flags to force a fall-through.
@@ -791,7 +789,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
// Check that the size of the input string chars is in range.
__ Neg(x11, x10);
__ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
- __ Check(ls, kInputStringTooLong);
+ __ Check(ls, AbortReason::kInputStringTooLong);
}
__ Mov(current_input_offset(), w10);
@@ -855,7 +853,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
if (masm_->emit_debug_code()) {
// Check that the size of the input string chars is in range.
__ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
- __ Check(ls, kInputStringTooLong);
+ __ Check(ls, AbortReason::kInputStringTooLong);
}
// input_start has a start_offset offset on entry. We need to include
// it when computing the length of the whole string.
@@ -1158,7 +1156,7 @@ void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
if (masm_->emit_debug_code()) {
__ Cmp(x10, kWRegMask);
// The code offset has to fit in a W register.
- __ Check(ls, kOffsetOutOfRange);
+ __ Check(ls, AbortReason::kOffsetOutOfRange);
}
}
Push(w10);
@@ -1314,7 +1312,7 @@ void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
if (masm_->emit_debug_code()) {
__ Cmp(x10, Operand(w10, SXTW));
// The stack offset needs to fit in a W register.
- __ Check(eq, kOffsetOutOfRange);
+ __ Check(eq, AbortReason::kOffsetOutOfRange);
}
StoreRegister(reg, w10);
}
@@ -1623,7 +1621,7 @@ void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
__ Add(x10, x10, Operand(current_input_offset(), SXTW));
__ Cmp(x10, Operand(w10, SXTW));
// The offset needs to fit in a W register.
- __ Check(eq, kOffsetOutOfRange);
+ __ Check(eq, AbortReason::kOffsetOutOfRange);
} else {
__ Add(w10, current_input_offset(), cp_offset * char_size());
}
diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
index 99d1466f54..cb240d6c67 100644
--- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@@ -531,12 +531,12 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
Label success;
__ cmp(current_character(), ' ');
__ j(equal, &success, Label::kNear);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ lea(eax, Operand(current_character(), -'\t'));
__ cmp(eax, '\r' - '\t');
__ j(below_equal, &success, Label::kNear);
// \u00a0 (NBSP).
- __ cmp(eax, 0x00a0 - '\t');
+ __ cmp(eax, 0x00A0 - '\t');
BranchOrBacktrack(not_equal, on_no_match);
__ bind(&success);
return true;
@@ -558,18 +558,18 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ mov(eax, current_character());
__ xor_(eax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(eax, Immediate(0x0b));
- __ cmp(eax, 0x0c - 0x0b);
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(eax, Immediate(0x0B));
+ __ cmp(eax, 0x0C - 0x0B);
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(eax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(eax, Immediate(0x2028 - 0x0B));
__ cmp(eax, 0x2029 - 0x2028);
BranchOrBacktrack(below_equal, on_no_match);
}
@@ -610,13 +610,13 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
// Match any character.
return true;
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 or 0x2029).
// The opposite of '.'.
__ mov(eax, current_character());
__ xor_(eax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(eax, Immediate(0x0b));
- __ cmp(eax, 0x0c - 0x0b);
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(eax, Immediate(0x0B));
+ __ cmp(eax, 0x0C - 0x0B);
if (mode_ == LATIN1) {
BranchOrBacktrack(above, on_no_match);
} else {
@@ -624,9 +624,9 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(below_equal, &done);
DCHECK_EQ(UC16, mode_);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(eax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(eax, Immediate(0x2028 - 0x0B));
__ cmp(eax, 1);
BranchOrBacktrack(above, on_no_match);
__ bind(&done);
diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc
index 9d56e4cfa3..a26a1d77ce 100644
--- a/deps/v8/src/regexp/jsregexp.cc
+++ b/deps/v8/src/regexp/jsregexp.cc
@@ -98,12 +98,36 @@ ContainedInLattice AddRange(ContainedInLattice containment,
return containment;
}
-// Generic RegExp methods. Dispatches to implementation specific methods.
-
+// More makes code generation slower, less makes V8 benchmark score lower.
+const int kMaxLookaheadForBoyerMoore = 8;
// In a 3-character pattern you can maximally step forwards 3 characters
// at a time, which is not always enough to pay for the extra logic.
const int kPatternTooShortForBoyerMoore = 2;
+// Identifies the sort of regexps where the regexp engine is faster
+// than the code used for atom matches.
+static bool HasFewDifferentCharacters(Handle<String> pattern) {
+ int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
+ if (length <= kPatternTooShortForBoyerMoore) return false;
+ const int kMod = 128;
+ bool character_found[kMod];
+ int different = 0;
+ memset(&character_found[0], 0, sizeof(character_found));
+ for (int i = 0; i < length; i++) {
+ int ch = (pattern->Get(i) & (kMod - 1));
+ if (!character_found[ch]) {
+ character_found[ch] = true;
+ different++;
+ // We declare a regexp low-alphabet if it has at least 3 times as many
+ // characters as it has different characters.
+ if (different * 3 > length) return false;
+ }
+ }
+ return true;
+}
+
+// Generic RegExp methods. Dispatches to implementation specific methods.
+
MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
@@ -133,7 +157,7 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
bool has_been_compiled = false;
if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
- pattern->length() <= kPatternTooShortForBoyerMoore) {
+ !HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true;
@@ -141,12 +165,11 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
- if (!IgnoreCase(atom->flags()) &&
- atom_pattern.length() <= kPatternTooShortForBoyerMoore) {
- Handle<String> atom_string;
- ASSIGN_RETURN_ON_EXCEPTION(
- isolate, atom_string,
- isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
+ Handle<String> atom_string;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, atom_string,
+ isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
+ if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
AtomCompile(re, pattern, flags, atom_string);
has_been_compiled = true;
}
@@ -2433,8 +2456,8 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
} else {
// For 2-character preloads in one-byte mode or 1-character preloads in
// two-byte mode we also use a 16 bit load with zero extend.
- static const uint32_t kTwoByteMask = 0xffff;
- static const uint32_t kFourByteMask = 0xffffffff;
+ static const uint32_t kTwoByteMask = 0xFFFF;
+ static const uint32_t kFourByteMask = 0xFFFFFFFF;
if (details->characters() == 2 && compiler->one_byte()) {
if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
} else if (details->characters() == 1 && !compiler->one_byte()) {
@@ -2554,6 +2577,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
details->positions(characters_filled_in);
RegExpCharacterClass* tree = elm.char_class();
ZoneList<CharacterRange>* ranges = tree->ranges(zone());
+ DCHECK(!ranges->is_empty());
if (tree->is_negated()) {
// A quick check uses multi-character mask and compare. There is no
// useful way to incorporate a negative char class into this scheme
@@ -2716,12 +2740,11 @@ RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
return set_replacement(this);
}
-
-// We need to check for the following characters: 0x39c 0x3bc 0x178.
+// We need to check for the following characters: 0x39C 0x3BC 0x178.
static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
// TODO(dcarney): this could be a lot more efficient.
- return range.Contains(0x39c) ||
- range.Contains(0x3bc) || range.Contains(0x178);
+ return range.Contains(0x039C) || range.Contains(0x03BC) ||
+ range.Contains(0x0178);
}
@@ -2973,7 +2996,7 @@ static void EmitHat(RegExpCompiler* compiler,
new_trace.backtrack())) {
// Newline means \n, \r, 0x2028 or 0x2029.
if (!compiler->one_byte()) {
- assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
+ assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
}
assembler->CheckCharacter('\n', &ok);
assembler->CheckNotCharacter('\r', new_trace.backtrack());
@@ -2982,8 +3005,6 @@ static void EmitHat(RegExpCompiler* compiler,
on_success->Emit(compiler, &new_trace);
}
-// More makes code generation slower, less makes V8 benchmark score lower.
-const int kMaxLookaheadForBoyerMoore = 8;
// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
@@ -3253,9 +3274,9 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
JSRegExp::Flags flags) {
DCHECK_NOT_NULL(ranges);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
- elms->Add(
- TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, flags)),
- zone);
+ elms->Add(TextElement::CharClass(
+ new (zone) RegExpCharacterClass(zone, ranges, flags)),
+ zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
@@ -3268,10 +3289,10 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(lead_ranges, flags)),
+ new (zone) RegExpCharacterClass(zone, lead_ranges, flags)),
zone);
elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(trail_ranges, flags)),
+ new (zone) RegExpCharacterClass(zone, trail_ranges, flags)),
zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
@@ -5089,10 +5110,9 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
ranges = negated;
}
if (ranges->length() == 0) {
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- ranges->Add(CharacterRange::Everything(), zone);
+ JSRegExp::Flags default_flags;
RegExpCharacterClass* fail =
- new (zone) RegExpCharacterClass(ranges, default_flags, NEGATED);
+ new (zone) RegExpCharacterClass(zone, ranges, default_flags);
return new (zone) TextNode(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
@@ -5346,8 +5366,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (IsUnicode(flags) && contains_trail_surrogate) {
character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
}
- alternatives->at(write_posn++) =
- new (zone) RegExpCharacterClass(ranges, flags, character_class_flags);
+ alternatives->at(write_posn++) = new (zone)
+ RegExpCharacterClass(zone, ranges, flags, character_class_flags);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {
diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
index e45eeeb492..89046a56f3 100644
--- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
+++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
@@ -85,8 +85,7 @@ namespace internal {
* bool direct_call = false,
* Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in mips/simulator-mips.h.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -509,11 +508,11 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type,
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
- // Check range 0x09..0x0d.
+ // Check range 0x09..0x0D.
__ Subu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
- BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00a0 - '\t'));
+ BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
return true;
}
@@ -532,34 +531,34 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Subu(a0, a0, Operand(0x0b));
- BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Subu(a0, a0, Operand(0x0B));
+ BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Subu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Subu(a0, a0, Operand(0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Subu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
- BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
Label done;
- BranchOrBacktrack(&done, ls, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Subu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
}
diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
index 68a7f87843..841b2931fe 100644
--- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
+++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
@@ -120,8 +120,7 @@ namespace internal {
* bool direct_call = false,
* Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in mips/simulator-mips.h.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*
* clang-format on
*/
@@ -540,11 +539,11 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type,
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
- // Check range 0x09..0x0d.
+ // Check range 0x09..0x0D.
__ Dsubu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
- BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00a0 - '\t'));
+ BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
return true;
}
@@ -563,34 +562,34 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Dsubu(a0, a0, Operand(0x0b));
- BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Dsubu(a0, a0, Operand(0x0B));
+ BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Dsubu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Dsubu(a0, a0, Operand(0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Dsubu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
- BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
Label done;
- BranchOrBacktrack(&done, ls, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Dsubu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
}
diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
index bc3e643369..1187fc04b8 100644
--- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
+++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
@@ -86,8 +86,7 @@ namespace internal {
* bool direct_call = false,
* Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in ppc/simulator-ppc.h.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -522,12 +521,12 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(uc16 type,
Label success;
__ cmpi(current_character(), Operand(' '));
__ beq(&success);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ subi(r3, current_character(), Operand('\t'));
__ cmpli(r3, Operand('\r' - '\t'));
__ ble(&success);
// \u00a0 (NBSP).
- __ cmpi(r3, Operand(0x00a0 - '\t'));
+ __ cmpi(r3, Operand(0x00A0 - '\t'));
BranchOrBacktrack(ne, on_no_match);
__ bind(&success);
return true;
@@ -549,37 +548,37 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(le, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subi(r3, r3, Operand(0x0b));
- __ cmpli(r3, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subi(r3, r3, Operand(0x0B));
+ __ cmpli(r3, Operand(0x0C - 0x0B));
BranchOrBacktrack(le, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subi(r3, r3, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subi(r3, r3, Operand(0x2028 - 0x0B));
__ cmpli(r3, Operand(1));
BranchOrBacktrack(le, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subi(r3, r3, Operand(0x0b));
- __ cmpli(r3, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subi(r3, r3, Operand(0x0B));
+ __ cmpli(r3, Operand(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(gt, on_no_match);
} else {
Label done;
__ ble(&done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subi(r3, r3, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subi(r3, r3, Operand(0x2028 - 0x0B));
__ cmpli(r3, Operand(1));
BranchOrBacktrack(gt, on_no_match);
__ bind(&done);
diff --git a/deps/v8/src/regexp/regexp-ast.h b/deps/v8/src/regexp/regexp-ast.h
index e60621f8b6..1a94832f71 100644
--- a/deps/v8/src/regexp/regexp-ast.h
+++ b/deps/v8/src/regexp/regexp-ast.h
@@ -306,11 +306,17 @@ class RegExpCharacterClass final : public RegExpTree {
typedef base::Flags<Flag> CharacterClassFlags;
RegExpCharacterClass(
- ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
+ Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
CharacterClassFlags character_class_flags = CharacterClassFlags())
: set_(ranges),
flags_(flags),
- character_class_flags_(character_class_flags) {}
+ character_class_flags_(character_class_flags) {
+ // Convert the empty set of ranges to the negated Everything() range.
+ if (ranges->is_empty()) {
+ ranges->Add(CharacterRange::Everything(), zone);
+ character_class_flags_ ^= NEGATED;
+ }
+ }
RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
: set_(type),
flags_(flags),
@@ -352,7 +358,7 @@ class RegExpCharacterClass final : public RegExpTree {
private:
CharacterSet set_;
const JSRegExp::Flags flags_;
- const CharacterClassFlags character_class_flags_;
+ CharacterClassFlags character_class_flags_;
};
diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc
index 600757a72b..af285abcb0 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler.cc
@@ -286,9 +286,15 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
Address stack_base = stack_scope.stack()->stack_base();
int direct_call = 0;
- int result = CALL_GENERATED_REGEXP_CODE(
- isolate, code->entry(), input, start_offset, input_start, input_end,
- output, output_size, stack_base, direct_call, isolate);
+
+ using RegexpMatcherSig = int(
+ String * input, int start_offset, // NOLINT(readability/casting)
+ const byte* input_start, const byte* input_end, int* output,
+ int output_size, Address stack_base, int direct_call, Isolate* isolate);
+
+ auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
+ int result = fn.Call(input, start_offset, input_start, input_end, output,
+ output_size, stack_base, direct_call, isolate);
DCHECK(result >= RETRY);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
@@ -299,7 +305,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
return static_cast<Result>(result);
}
-
+// clang-format off
const byte NativeRegExpMacroAssembler::word_character_map[] = {
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
@@ -308,18 +314,18 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = {
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
- 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
-
- 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
- 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
-
- 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
- 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // '0' - '7'
+ 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
+
+ 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'A' - 'G'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'H' - 'O'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'P' - 'W'
+ 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu, // 'X' - 'Z', '_'
+
+ 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'a' - 'g'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'h' - 'o'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'p' - 'w'
+ 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
// Latin-1 range
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
@@ -341,7 +347,7 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = {
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
};
-
+// clang-format on
Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
Address* stack_base,
diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc
index 2c6aa5b23a..a7724c5d42 100644
--- a/deps/v8/src/regexp/regexp-parser.cc
+++ b/deps/v8/src/regexp/regexp-parser.cc
@@ -280,12 +280,12 @@ RegExpTree* RegExpParser::ParseDisjunction() {
// Everything.
CharacterRange::AddClassEscape('*', ranges, false, zone());
} else {
- // Everything except \x0a, \x0d, \u2028 and \u2029
+ // Everything except \x0A, \x0D, \u2028 and \u2029
CharacterRange::AddClassEscape('.', ranges, false, zone());
}
RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(ranges, builder->flags());
+ new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
break;
}
@@ -332,8 +332,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
new (zone()) ZoneList<CharacterRange>(2, zone());
CharacterRange::AddClassEscape(
c, ranges, unicode() && builder->ignore_case(), zone());
- RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(ranges, builder->flags());
+ RegExpCharacterClass* cc = new (zone())
+ RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
break;
}
@@ -348,8 +348,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
if (!ParsePropertyClass(ranges, p == 'P')) {
return ReportError(CStrVector("Invalid property name"));
}
- RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(ranges, builder->flags());
+ RegExpCharacterClass* cc = new (zone())
+ RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
} else {
// With /u, no identity escapes except for syntax characters
@@ -451,7 +451,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
builder->AddCharacter('\\');
} else {
Advance(2);
- builder->AddCharacter(controlLetter & 0x1f);
+ builder->AddCharacter(controlLetter & 0x1F);
}
break;
}
@@ -1145,7 +1145,7 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) {
if (current() == '{' && unicode()) {
int start = position();
Advance();
- if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) {
+ if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) {
if (current() == '}') {
Advance();
return true;
@@ -1255,10 +1255,15 @@ bool LookupSpecialPropertyValueName(const char* name,
ZoneList<CharacterRange>* result,
bool negate, Zone* zone) {
if (NameEquals(name, "Any")) {
- if (!negate) result->Add(CharacterRange::Everything(), zone);
+ if (negate) {
+ // Leave the list of character ranges empty, since the negation of 'Any'
+ // is the empty set.
+ } else {
+ result->Add(CharacterRange::Everything(), zone);
+ }
} else if (NameEquals(name, "ASCII")) {
result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
- : CharacterRange::Range(0x0, 0x7f),
+ : CharacterRange::Range(0x0, 0x7F),
zone);
} else if (NameEquals(name, "Assigned")) {
return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
@@ -1486,8 +1491,8 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
if (letter >= 'A' && letter <= 'Z') {
Advance(2);
// Control letters mapped to ASCII control characters in the range
- // 0x00-0x1f.
- return controlLetter & 0x1f;
+ // 0x00-0x1F.
+ return controlLetter & 0x1F;
}
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
@@ -1497,7 +1502,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
if ((controlLetter >= '0' && controlLetter <= '9') ||
controlLetter == '_') {
Advance(2);
- return controlLetter & 0x1f;
+ return controlLetter & 0x1F;
}
// We match JSC in reading the backslash as a literal
// character instead of as starting an escape.
@@ -1672,14 +1677,10 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
return ReportError(CStrVector(kUnterminated));
}
Advance();
- if (ranges->length() == 0) {
- ranges->Add(CharacterRange::Everything(), zone());
- is_negated = !is_negated;
- }
RegExpCharacterClass::CharacterClassFlags character_class_flags;
if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
- return new (zone())
- RegExpCharacterClass(ranges, builder->flags(), character_class_flags);
+ return new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags(),
+ character_class_flags);
}
@@ -1853,7 +1854,8 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {
AddTerm(new (zone()) RegExpCharacterClass(
- CharacterRange::List(zone(), CharacterRange::Singleton(c)), flags_));
+ zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)),
+ flags_));
}
diff --git a/deps/v8/src/regexp/regexp-utils.cc b/deps/v8/src/regexp/regexp-utils.cc
index 16427e2933..d483125dd6 100644
--- a/deps/v8/src/regexp/regexp-utils.cc
+++ b/deps/v8/src/regexp/regexp-utils.cc
@@ -134,7 +134,7 @@ bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
// TODO(ishell): Update this check once map changes for constant field
// tracking are landing.
-#if defined(DEBUG) || defined(ENABLE_SLOWFAST_SWITCH)
+#ifdef V8_ENABLE_FORCE_SLOW_PATH
if (isolate->force_slow_path()) return false;
#endif
diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
index fc9548fc78..4f8f234171 100644
--- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
+++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
@@ -88,8 +88,7 @@ namespace internal {
* bool direct_call = false,
* Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in s390/simulator-s390.h.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -493,12 +492,12 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(uc16 type,
Label success;
__ CmpP(current_character(), Operand(' '));
__ beq(&success);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ SubP(r2, current_character(), Operand('\t'));
__ CmpLogicalP(r2, Operand('\r' - '\t'));
__ ble(&success);
// \u00a0 (NBSP).
- __ CmpLogicalP(r2, Operand(0x00a0 - '\t'));
+ __ CmpLogicalP(r2, Operand(0x00A0 - '\t'));
BranchOrBacktrack(ne, on_no_match);
__ bind(&success);
return true;
@@ -520,37 +519,37 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(le, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ SubP(r2, Operand(0x0b));
- __ CmpLogicalP(r2, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ SubP(r2, Operand(0x0B));
+ __ CmpLogicalP(r2, Operand(0x0C - 0x0B));
BranchOrBacktrack(le, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ SubP(r2, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ SubP(r2, Operand(0x2028 - 0x0B));
__ CmpLogicalP(r2, Operand(1));
BranchOrBacktrack(le, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ SubP(r2, Operand(0x0b));
- __ CmpLogicalP(r2, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ SubP(r2, Operand(0x0B));
+ __ CmpLogicalP(r2, Operand(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(gt, on_no_match);
} else {
Label done;
__ ble(&done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ SubP(r2, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ SubP(r2, Operand(0x2028 - 0x0B));
__ CmpLogicalP(r2, Operand(1));
BranchOrBacktrack(gt, on_no_match);
__ bind(&done);
@@ -773,7 +772,7 @@ Handle<HeapObject> RegExpMacroAssemblerS390::GetCode(Handle<String> source) {
// and the following use of that register.
__ lay(r2, MemOperand(r2, num_saved_registers_ * kIntSize));
for (int i = 0; i < num_saved_registers_;) {
- if (false && i < num_saved_registers_ - 4) {
+ if ((false) && i < num_saved_registers_ - 4) {
// TODO(john.yan): Can be optimized by SIMD instructions
__ LoadMultipleP(r3, r6, register_location(i + 3));
if (mode_ == UC16) {
diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
index 1e21182c35..eb57b29602 100644
--- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -551,12 +551,12 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
Label success;
__ cmpl(current_character(), Immediate(' '));
__ j(equal, &success, Label::kNear);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ leap(rax, Operand(current_character(), -'\t'));
__ cmpl(rax, Immediate('\r' - '\t'));
__ j(below_equal, &success, Label::kNear);
// \u00a0 (NBSP).
- __ cmpl(rax, Immediate(0x00a0 - '\t'));
+ __ cmpl(rax, Immediate(0x00A0 - '\t'));
BranchOrBacktrack(not_equal, on_no_match);
__ bind(&success);
return true;
@@ -578,39 +578,39 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ movl(rax, current_character());
__ xorp(rax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subl(rax, Immediate(0x0b));
- __ cmpl(rax, Immediate(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subl(rax, Immediate(0x0B));
+ __ cmpl(rax, Immediate(0x0C - 0x0B));
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subl(rax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subl(rax, Immediate(0x2028 - 0x0B));
__ cmpl(rax, Immediate(0x2029 - 0x2028));
BranchOrBacktrack(below_equal, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ movl(rax, current_character());
__ xorp(rax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subl(rax, Immediate(0x0b));
- __ cmpl(rax, Immediate(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subl(rax, Immediate(0x0B));
+ __ cmpl(rax, Immediate(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(above, on_no_match);
} else {
Label done;
BranchOrBacktrack(below_equal, &done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subl(rax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subl(rax, Immediate(0x2028 - 0x0B));
__ cmpl(rax, Immediate(0x2029 - 0x2028));
BranchOrBacktrack(above, on_no_match);
__ bind(&done);