summaryrefslogtreecommitdiff
path: root/chromium/v8/src/regexp
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/v8/src/regexp')
-rw-r--r--chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.cc7
-rw-r--r--chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.h2
-rw-r--r--chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc7
-rw-r--r--chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h2
-rw-r--r--chromium/v8/src/regexp/gen-regexp-special-case.cc5
-rw-r--r--chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc7
-rw-r--r--chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h2
-rw-r--r--chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.cc7
-rw-r--r--chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.h2
-rw-r--r--chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc7
-rw-r--r--chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h2
-rw-r--r--chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc7
-rw-r--r--chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h2
-rw-r--r--chromium/v8/src/regexp/regexp-ast.h5
-rw-r--r--chromium/v8/src/regexp/regexp-bytecode-generator.cc10
-rw-r--r--chromium/v8/src/regexp/regexp-bytecode-generator.h1
-rw-r--r--chromium/v8/src/regexp/regexp-bytecode-peephole.cc19
-rw-r--r--chromium/v8/src/regexp/regexp-bytecodes.h16
-rw-r--r--chromium/v8/src/regexp/regexp-compiler-tonode.cc18
-rw-r--r--chromium/v8/src/regexp/regexp-compiler.cc139
-rw-r--r--chromium/v8/src/regexp/regexp-compiler.h4
-rw-r--r--chromium/v8/src/regexp/regexp-dotprinter.cc2
-rw-r--r--chromium/v8/src/regexp/regexp-interpreter.cc200
-rw-r--r--chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc8
-rw-r--r--chromium/v8/src/regexp/regexp-macro-assembler-tracer.h1
-rw-r--r--chromium/v8/src/regexp/regexp-macro-assembler.cc41
-rw-r--r--chromium/v8/src/regexp/regexp-macro-assembler.h15
-rw-r--r--chromium/v8/src/regexp/regexp-parser.cc58
-rw-r--r--chromium/v8/src/regexp/regexp-parser.h7
-rw-r--r--chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.cc7
-rw-r--r--chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.h2
-rw-r--r--chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.cc7
-rw-r--r--chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.h1
33 files changed, 383 insertions, 237 deletions
diff --git a/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
index 10dad83c28c..aaee9b196c6 100644
--- a/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
@@ -224,7 +224,7 @@ void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ ldr(r0, register_location(start_reg)); // Index of start of capture
__ ldr(r1, register_location(start_reg + 1)); // Index of end of capture
@@ -335,7 +335,10 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
- ExternalReference::re_case_insensitive_compare_uc16(isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(function, argument_count);
}
diff --git a/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.h b/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.h
index 549636a6744..910e5c46079 100644
--- a/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.h
+++ b/chromium/v8/src/regexp/arm/regexp-macro-assembler-arm.h
@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
diff --git a/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
index 055f5639f5b..b56a8ac709c 100644
--- a/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@@ -294,7 +294,7 @@ void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
Register capture_start_offset = w10;
@@ -425,7 +425,10 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
- ExternalReference::re_case_insensitive_compare_uc16(isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(function, argument_count);
}
diff --git a/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h b/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
index 2b5feb1dbdc..aeb49aa9fff 100644
--- a/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
+++ b/chromium/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
@@ -42,7 +42,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
diff --git a/chromium/v8/src/regexp/gen-regexp-special-case.cc b/chromium/v8/src/regexp/gen-regexp-special-case.cc
index 9606c5d70d9..9ed338fc1d8 100644
--- a/chromium/v8/src/regexp/gen-regexp-special-case.cc
+++ b/chromium/v8/src/regexp/gen-regexp-special-case.cc
@@ -55,8 +55,9 @@ void PrintSpecial(std::ofstream& out) {
CHECK(U_SUCCESS(status));
// Iterate through all chars in BMP except surrogates.
- for (UChar32 i = 0; i < kNonBmpStart; i++) {
- if (i >= kSurrogateStart && i <= kSurrogateEnd) {
+ for (UChar32 i = 0; i < static_cast<UChar32>(kNonBmpStart); i++) {
+ if (i >= static_cast<UChar32>(kSurrogateStart) &&
+ i <= static_cast<UChar32>(kSurrogateEnd)) {
continue; // Ignore surrogate range
}
current.set(i, i);
diff --git a/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
index 501a0aff604..f439ae7de07 100644
--- a/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@@ -206,7 +206,7 @@ void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ mov(edx, register_location(start_reg)); // Index of start of capture
__ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
@@ -336,7 +336,10 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference compare =
- ExternalReference::re_case_insensitive_compare_uc16(isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(compare, argument_count);
}
// Pop original values before reacting on result value.
diff --git a/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h b/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
index 2339ca57e15..a30bff29a15 100644
--- a/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
+++ b/chromium/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
diff --git a/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
index 5f8eb4c6d33..a6289254457 100644
--- a/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
+++ b/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
@@ -226,7 +226,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ lw(a0, register_location(start_reg)); // Index of start of capture.
__ lw(a1, register_location(start_reg + 1)); // Index of end of capture.
@@ -340,7 +340,10 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
- ExternalReference::re_case_insensitive_compare_uc16(masm_->isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(function, argument_count);
}
diff --git a/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.h b/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.h
index cafa7851803..e2aea1b0910 100644
--- a/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.h
+++ b/chromium/v8/src/regexp/mips/regexp-macro-assembler-mips.h
@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
diff --git a/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
index c443c8da467..e79038b00b7 100644
--- a/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
+++ b/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
@@ -262,7 +262,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ Ld(a0, register_location(start_reg)); // Index of start of capture.
__ Ld(a1, register_location(start_reg + 1)); // Index of end of capture.
@@ -376,7 +376,10 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
- ExternalReference::re_case_insensitive_compare_uc16(masm_->isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(function, argument_count);
}
diff --git a/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h b/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
index 161a01e2fca..aebfec10604 100644
--- a/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
+++ b/chromium/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
@@ -37,7 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
diff --git a/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
index 5a6eb315103..9db26777d31 100644
--- a/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
+++ b/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
@@ -242,7 +242,7 @@ void RegExpMacroAssemblerPPC::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ LoadP(r3, register_location(start_reg), r0); // Index of start of capture
__ LoadP(r4, register_location(start_reg + 1), r0); // Index of end
@@ -356,7 +356,10 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
- ExternalReference::re_case_insensitive_compare_uc16(isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(function, argument_count);
}
diff --git a/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h b/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
index 598691d9883..f6b959837fc 100644
--- a/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
+++ b/chromium/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
@@ -36,7 +36,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
diff --git a/chromium/v8/src/regexp/regexp-ast.h b/chromium/v8/src/regexp/regexp-ast.h
index a9106d3d304..643e1fc983f 100644
--- a/chromium/v8/src/regexp/regexp-ast.h
+++ b/chromium/v8/src/regexp/regexp-ast.h
@@ -76,9 +76,8 @@ class Interval {
int to_;
};
-
-// Represents code units in the range from from_ to to_, both ends are
-// inclusive.
+// Represents code points (with values up to 0x10FFFF) in the range from from_
+// to to_, both ends are inclusive.
class CharacterRange {
public:
CharacterRange() : from_(0), to_(0) {}
diff --git a/chromium/v8/src/regexp/regexp-bytecode-generator.cc b/chromium/v8/src/regexp/regexp-bytecode-generator.cc
index e82b67b530a..8abd15384e7 100644
--- a/chromium/v8/src/regexp/regexp-bytecode-generator.cc
+++ b/chromium/v8/src/regexp/regexp-bytecode-generator.cc
@@ -182,7 +182,7 @@ void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
int eats_at_least) {
DCHECK_GE(eats_at_least, characters);
if (eats_at_least > characters && check_bounds) {
- DCHECK(is_uint24(cp_offset + eats_at_least));
+ DCHECK(is_int24(cp_offset + eats_at_least));
Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least);
EmitOrLink(on_failure);
check_bounds = false; // Load below doesn't need to check.
@@ -329,11 +329,13 @@ void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg,
}
void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_not_equal) {
+ int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
DCHECK_LE(0, start_reg);
DCHECK_GE(kMaxRegister, start_reg);
- Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
- : BC_CHECK_NOT_BACK_REF_NO_CASE,
+ Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
+ : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
+ : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
+ : BC_CHECK_NOT_BACK_REF_NO_CASE),
start_reg);
EmitOrLink(on_not_equal);
}
diff --git a/chromium/v8/src/regexp/regexp-bytecode-generator.h b/chromium/v8/src/regexp/regexp-bytecode-generator.h
index fdb9b468619..9c4b6057c23 100644
--- a/chromium/v8/src/regexp/regexp-bytecode-generator.h
+++ b/chromium/v8/src/regexp/regexp-bytecode-generator.h
@@ -69,6 +69,7 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ bool unicode,
Label* on_no_match) override;
void IfRegisterLT(int register_index, int comparand, Label* if_lt) override;
void IfRegisterGE(int register_index, int comparand, Label* if_ge) override;
diff --git a/chromium/v8/src/regexp/regexp-bytecode-peephole.cc b/chromium/v8/src/regexp/regexp-bytecode-peephole.cc
index f0957f0779a..dcbafac334f 100644
--- a/chromium/v8/src/regexp/regexp-bytecode-peephole.cc
+++ b/chromium/v8/src/regexp/regexp-bytecode-peephole.cc
@@ -187,7 +187,8 @@ class RegExpBytecodePeephole {
BytecodeSequenceNode& CreateSequence(int bytecode);
// Checks for optimization candidates at pc and emits optimized bytecode to
// the internal buffer. Returns the length of replaced bytecodes in bytes.
- int TryOptimizeSequence(const byte* bytecode, int start_pc);
+ int TryOptimizeSequence(const byte* bytecode, int bytecode_length,
+ int start_pc);
// Emits optimized bytecode to the internal buffer. start_pc points to the
// start of the sequence in bytecode and last_node is the last
// BytecodeSequenceNode of the matching sequence found.
@@ -626,7 +627,7 @@ bool RegExpBytecodePeephole::OptimizeBytecode(const byte* bytecode,
bool did_optimize = false;
while (old_pc < length) {
- int replaced_len = TryOptimizeSequence(bytecode, old_pc);
+ int replaced_len = TryOptimizeSequence(bytecode, length, old_pc);
if (replaced_len > 0) {
old_pc += replaced_len;
did_optimize = true;
@@ -659,6 +660,7 @@ BytecodeSequenceNode& RegExpBytecodePeephole::CreateSequence(int bytecode) {
}
int RegExpBytecodePeephole::TryOptimizeSequence(const byte* bytecode,
+ int bytecode_length,
int start_pc) {
BytecodeSequenceNode* seq_node = sequences_;
BytecodeSequenceNode* valid_seq_end = nullptr;
@@ -667,13 +669,12 @@ int RegExpBytecodePeephole::TryOptimizeSequence(const byte* bytecode,
// Check for the longest valid sequence matching any of the pre-defined
// sequences in the Trie data structure.
- while ((seq_node = seq_node->Find(bytecode[current_pc]))) {
- if (!seq_node->CheckArguments(bytecode, start_pc)) {
- break;
- }
- if (seq_node->IsSequence()) {
- valid_seq_end = seq_node;
- }
+ while (current_pc < bytecode_length) {
+ seq_node = seq_node->Find(bytecode[current_pc]);
+ if (seq_node == nullptr) break;
+ if (!seq_node->CheckArguments(bytecode, start_pc)) break;
+
+ if (seq_node->IsSequence()) valid_seq_end = seq_node;
current_pc += RegExpBytecodeLength(bytecode[current_pc]);
}
diff --git a/chromium/v8/src/regexp/regexp-bytecodes.h b/chromium/v8/src/regexp/regexp-bytecodes.h
index 1664a476d29..e3248d7b837 100644
--- a/chromium/v8/src/regexp/regexp-bytecodes.h
+++ b/chromium/v8/src/regexp/regexp-bytecodes.h
@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_BYTECODES_H_
#define V8_REGEXP_REGEXP_BYTECODES_H_
+#include "src/base/bounds.h"
#include "src/base/macros.h"
#include "src/common/globals.h"
@@ -27,6 +28,7 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
// TODO(pthier): Argument offsets of bytecodes should be easily accessible by
// name or at least by position.
+// TODO(jgruber): More precise types (e.g. int32/uint32 instead of value32).
#define BYTECODE_ITERATOR(V) \
V(BREAK, 0, 4) /* bc8 */ \
V(PUSH_CP, 1, 4) /* bc8 pad24 */ \
@@ -101,12 +103,12 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
- V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) /* UNUSED */ \
+ V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
+ V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
+ V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) \
V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) /* UNUSED */ \
+ V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) \
V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \
@@ -229,16 +231,18 @@ static constexpr int kRegExpBytecodeLengths[] = {
};
inline constexpr int RegExpBytecodeLength(int bytecode) {
+ CONSTEXPR_DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
return kRegExpBytecodeLengths[bytecode];
}
-static const char* const kRegExpBytecodeNames[] = {
+static constexpr const char* const kRegExpBytecodeNames[] = {
#define DECLARE_BYTECODE_NAME(name, ...) #name,
BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME)
#undef DECLARE_BYTECODE_NAME
};
-inline const char* RegExpBytecodeName(int bytecode) {
+inline constexpr const char* RegExpBytecodeName(int bytecode) {
+ CONSTEXPR_DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1));
return kRegExpBytecodeNames[bytecode];
}
diff --git a/chromium/v8/src/regexp/regexp-compiler-tonode.cc b/chromium/v8/src/regexp/regexp-compiler-tonode.cc
index 9496de83e10..5fd53390797 100644
--- a/chromium/v8/src/regexp/regexp-compiler-tonode.cc
+++ b/chromium/v8/src/regexp/regexp-compiler-tonode.cc
@@ -56,11 +56,11 @@ static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
return false;
}
for (int i = 0; i < length; i += 2) {
- if (special_class[i] != (range.to() + 1)) {
+ if (static_cast<uc32>(special_class[i]) != (range.to() + 1)) {
return false;
}
range = ranges->at((i >> 1) + 1);
- if (special_class[i + 1] != range.from()) {
+ if (static_cast<uc32>(special_class[i + 1]) != range.from()) {
return false;
}
}
@@ -79,8 +79,8 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges,
}
for (int i = 0; i < length; i += 2) {
CharacterRange range = ranges->at(i >> 1);
- if (range.from() != special_class[i] ||
- range.to() != special_class[i + 1] - 1) {
+ if (range.from() != static_cast<uc32>(special_class[i]) ||
+ range.to() != static_cast<uc32>(special_class[i + 1] - 1)) {
return false;
}
}
@@ -1154,7 +1154,7 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
CharacterRange range = ranges->at(i);
uc32 from = range.from();
if (from > String::kMaxUtf16CodeUnit) continue;
- uc32 to = Min(range.to(), String::kMaxUtf16CodeUnit);
+ uc32 to = Min(range.to(), String::kMaxUtf16CodeUnitU);
// Nothing to be done for surrogates.
if (from >= kLeadSurrogateStart && to <= kTrailSurrogateEnd) continue;
if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
@@ -1197,7 +1197,7 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
CharacterRange range = ranges->at(i);
uc32 bottom = range.from();
if (bottom > String::kMaxUtf16CodeUnit) continue;
- uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
+ uc32 top = Min(range.to(), String::kMaxUtf16CodeUnitU);
// Nothing to be done for surrogates.
if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
@@ -1232,7 +1232,7 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
// block we do this for all the blocks covered by the range (handling
// characters that is not in a block as a "singleton block").
unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int pos = bottom;
+ uc32 pos = bottom;
while (pos <= top) {
int length =
isolate->jsregexp_canonrange()->get(pos, '\0', equivalents);
@@ -1265,7 +1265,7 @@ bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
DCHECK_NOT_NULL(ranges);
int n = ranges->length();
if (n <= 1) return true;
- int max = ranges->at(0).to();
+ uc32 max = ranges->at(0).to();
for (int i = 1; i < n; i++) {
CharacterRange next_range = ranges->at(i);
if (next_range.from() <= max + 1) return false;
@@ -1366,7 +1366,7 @@ void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
// Check whether ranges are already canonical (increasing, non-overlapping,
// non-adjacent).
int n = character_ranges->length();
- int max = character_ranges->at(0).to();
+ uc32 max = character_ranges->at(0).to();
int i = 1;
while (i < n) {
CharacterRange current = character_ranges->at(i);
diff --git a/chromium/v8/src/regexp/regexp-compiler.cc b/chromium/v8/src/regexp/regexp-compiler.cc
index a04180fd346..58d598ca768 100644
--- a/chromium/v8/src/regexp/regexp-compiler.cc
+++ b/chromium/v8/src/regexp/regexp-compiler.cc
@@ -174,6 +174,24 @@ using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
// trace is not recorded in the node and so it cannot currently be reused in
// the event that code generation is requested for an identical trace.
+namespace {
+
+constexpr uc32 MaxCodeUnit(const bool one_byte) {
+ STATIC_ASSERT(String::kMaxOneByteCharCodeU <=
+ std::numeric_limits<uint16_t>::max());
+ STATIC_ASSERT(String::kMaxUtf16CodeUnitU <=
+ std::numeric_limits<uint16_t>::max());
+ return one_byte ? String::kMaxOneByteCharCodeU : String::kMaxUtf16CodeUnitU;
+}
+
+constexpr uint32_t CharMask(const bool one_byte) {
+ STATIC_ASSERT(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU + 1));
+ STATIC_ASSERT(base::bits::IsPowerOfTwo(String::kMaxUtf16CodeUnitU + 1));
+ return MaxCodeUnit(one_byte);
+}
+
+} // namespace
+
void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE(); }
void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
@@ -386,9 +404,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
int pushes = 0;
for (int reg = 0; reg <= max_register; reg++) {
- if (!affected_registers.Get(reg)) {
- continue;
- }
+ if (!affected_registers.Get(reg)) continue;
// The chronologically first deferred action in the trace
// is used to infer the action needed to restore a register
@@ -710,6 +726,20 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
}
}
+namespace {
+
+#ifdef DEBUG
+bool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) {
+ STATIC_ASSERT(sizeof(unibrow::uchar) == 4);
+ for (int i = 0; i < length; i++) {
+ if (chars[i] > String::kMaxUtf16CodeUnit) return false;
+ }
+ return true;
+}
+#endif // DEBUG
+
+} // namespace
+
// Returns the number of characters in the equivalence class, omitting those
// that cannot occur in the source string because it is Latin1.
static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
@@ -719,6 +749,7 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
#ifdef V8_INTL_SUPPORT
if (RegExpCaseFolding::IgnoreSet().contains(character)) {
letters[0] = character;
+ DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1));
return 1;
}
bool in_special_add_set =
@@ -744,9 +775,10 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
if (in_special_add_set && RegExpCaseFolding::Canonicalize(cu) != canon) {
continue;
}
- letters[items++] = (unibrow::uchar)(cu);
+ letters[items++] = static_cast<unibrow::uchar>(cu);
}
}
+ DCHECK(ContainsOnlyUtf16CodeUnits(letters, items));
return items;
#else
int length =
@@ -768,6 +800,7 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
length = new_length;
}
+ DCHECK(ContainsOnlyUtf16CodeUnits(letters, length));
return length;
#endif // V8_INTL_SUPPORT
}
@@ -820,12 +853,7 @@ static inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
bool one_byte, uc16 c1, uc16 c2,
Label* on_failure) {
- uc16 char_mask;
- if (one_byte) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
+ const uint32_t char_mask = CharMask(one_byte);
uc16 exor = c1 ^ c2;
// Check whether exor has only one bit set.
if (((exor - 1) & exor) == 0) {
@@ -1126,7 +1154,7 @@ static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
return;
}
- if ((min_char >> kBits) != (first >> kBits)) {
+ if ((min_char >> kBits) != static_cast<uc32>(first >> kBits)) {
masm->CheckCharacterLT(first, odd_label);
GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
fall_through, odd_label, even_label);
@@ -1185,21 +1213,13 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
ZoneList<CharacterRange>* ranges = cc->ranges(zone);
CharacterRange::Canonicalize(ranges);
- int max_char;
- if (one_byte) {
- max_char = String::kMaxOneByteCharCode;
- } else {
- max_char = String::kMaxUtf16CodeUnit;
- }
-
+ const uc32 max_char = MaxCodeUnit(one_byte);
int range_count = ranges->length();
int last_valid_range = range_count - 1;
while (last_valid_range >= 0) {
CharacterRange& range = ranges->at(last_valid_range);
- if (range.from() <= max_char) {
- break;
- }
+ if (range.from() <= max_char) break;
last_valid_range--;
}
@@ -1240,6 +1260,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
// entry at zero which goes to the failure label, but if there
// was already one there we fall through for success on that entry.
// Subsequent entries have alternating meaning (success/failure).
+ // TODO(jgruber,v8:10568): Change `range_boundaries` to a ZoneList<uc32>.
ZoneList<int>* range_boundaries =
new (zone) ZoneList<int>(last_valid_range, zone);
@@ -1256,7 +1277,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
range_boundaries->Add(range.to() + 1, zone);
}
int end_index = range_boundaries->length() - 1;
- if (range_boundaries->at(end_index) > max_char) {
+ if (static_cast<uc32>(range_boundaries->at(end_index)) > max_char) {
end_index--;
}
@@ -1370,12 +1391,7 @@ static inline uint32_t SmearBitsRight(uint32_t v) {
bool QuickCheckDetails::Rationalize(bool asc) {
bool found_useful_op = false;
- uint32_t char_mask;
- if (asc) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
+ const uint32_t char_mask = CharMask(asc);
mask_ = 0;
value_ = 0;
int char_shift = 0;
@@ -1495,12 +1511,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
if (details->characters() == 1) {
// If number of characters preloaded is 1 then we used a byte or 16 bit
// load so the value is already masked down.
- uint32_t char_mask;
- if (compiler->one_byte()) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
+ const uint32_t char_mask = CharMask(compiler->one_byte());
if ((mask & char_mask) == char_mask) need_mask = false;
mask &= char_mask;
} else {
@@ -1551,12 +1562,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
Isolate* isolate = compiler->macro_assembler()->isolate();
DCHECK(characters_filled_in < details->characters());
int characters = details->characters();
- int char_mask;
- if (compiler->one_byte()) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
+ const uint32_t char_mask = CharMask(compiler->one_byte());
for (int k = 0; k < elements()->length(); k++) {
TextElement elm = elements()->at(k);
if (elm.text_type() == TextElement::ATOM) {
@@ -1645,26 +1651,22 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
}
CharacterRange range = ranges->at(first_range);
- uc16 from = range.from();
- uc16 to = range.to();
- if (to > char_mask) {
- to = char_mask;
- }
- uint32_t differing_bits = (from ^ to);
+ const uc32 first_from = range.from();
+ const uc32 first_to = (range.to() > char_mask) ? char_mask : range.to();
+ const uint32_t differing_bits = (first_from ^ first_to);
// A mask and compare is only perfect if the differing bits form a
// number like 00011111 with one single block of trailing 1s.
if ((differing_bits & (differing_bits + 1)) == 0 &&
- from + differing_bits == to) {
+ first_from + differing_bits == first_to) {
pos->determines_perfectly = true;
}
uint32_t common_bits = ~SmearBitsRight(differing_bits);
- uint32_t bits = (from & common_bits);
+ uint32_t bits = (first_from & common_bits);
for (int i = first_range + 1; i < ranges->length(); i++) {
CharacterRange range = ranges->at(i);
- uc16 from = range.from();
- uc16 to = range.to();
+ const uc32 from = range.from();
if (from > char_mask) continue;
- if (to > char_mask) to = char_mask;
+ const uc32 to = (range.to() > char_mask) ? char_mask : range.to();
// Here we are combining more ranges into the mask and compare
// value. With each new range the mask becomes more sparse and
// so the chances of a false positive rise. A character class
@@ -1684,9 +1686,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
characters_filled_in++;
DCHECK(characters_filled_in <= details->characters());
- if (characters_filled_in == details->characters()) {
- return;
- }
+ if (characters_filled_in == details->characters()) return;
}
}
DCHECK(characters_filled_in != details->characters());
@@ -1748,7 +1748,7 @@ void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
pos->mask &= other_pos->mask;
pos->value &= pos->mask;
other_pos->value &= pos->mask;
- uc16 differing_bits = (pos->value ^ other_pos->value);
+ uint32_t differing_bits = (pos->value ^ other_pos->value);
pos->mask &= ~differing_bits;
pos->value &= pos->mask;
}
@@ -1858,16 +1858,20 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
if (range_count != 0 && ranges->at(0).from() == 0 &&
ranges->at(0).to() >= String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
+ if (IgnoreCase(cc->flags()) &&
+ RangesContainLatin1Equivalents(ranges)) {
continue;
+ }
return set_replacement(nullptr);
}
} else {
if (range_count == 0 ||
ranges->at(0).from() > String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
+ if (IgnoreCase(cc->flags()) &&
+ RangesContainLatin1Equivalents(ranges)) {
continue;
+ }
return set_replacement(nullptr);
}
}
@@ -2504,12 +2508,7 @@ RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
return ranges->length() == 0 ? on_success() : nullptr;
}
if (ranges->length() != 1) return nullptr;
- uint32_t max_char;
- if (compiler->one_byte()) {
- max_char = String::kMaxOneByteCharCode;
- } else {
- max_char = String::kMaxUtf16CodeUnit;
- }
+ const uc32 max_char = MaxCodeUnit(compiler->one_byte());
return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
}
@@ -2719,12 +2718,9 @@ void BoyerMoorePositionInfo::SetAll() {
BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
Zone* zone)
- : length_(length), compiler_(compiler) {
- if (compiler->one_byte()) {
- max_char_ = String::kMaxOneByteCharCode;
- } else {
- max_char_ = String::kMaxUtf16CodeUnit;
- }
+ : length_(length),
+ compiler_(compiler),
+ max_char_(MaxCodeUnit(compiler->one_byte())) {
bitmaps_ = new (zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
for (int i = 0; i < length; i++) {
bitmaps_->Add(new (zone) BoyerMoorePositionInfo(), zone);
@@ -3421,8 +3417,9 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
DCHECK_EQ(start_reg_ + 1, end_reg_);
if (IgnoreCase(flags_)) {
+ bool unicode = IsUnicode(flags_);
assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
- trace->backtrack());
+ unicode, trace->backtrack());
} else {
assembler->CheckNotBackReference(start_reg_, read_backward(),
trace->backtrack());
@@ -3787,7 +3784,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
} else {
for (int k = 0; k < ranges->length(); k++) {
CharacterRange& range = ranges->at(k);
- if (range.from() > max_char) continue;
+ if (static_cast<int>(range.from()) > max_char) continue;
int to = Min(max_char, static_cast<int>(range.to()));
bm->SetInterval(offset, Interval(range.from(), to));
}
diff --git a/chromium/v8/src/regexp/regexp-compiler.h b/chromium/v8/src/regexp/regexp-compiler.h
index a35ffcd01a2..4e7652883c4 100644
--- a/chromium/v8/src/regexp/regexp-compiler.h
+++ b/chromium/v8/src/regexp/regexp-compiler.h
@@ -96,8 +96,8 @@ class QuickCheckDetails {
void set_cannot_match() { cannot_match_ = true; }
struct Position {
Position() : mask(0), value(0), determines_perfectly(false) {}
- uc16 mask;
- uc16 value;
+ uc32 mask;
+ uc32 value;
bool determines_perfectly;
};
int characters() { return characters_; }
diff --git a/chromium/v8/src/regexp/regexp-dotprinter.cc b/chromium/v8/src/regexp/regexp-dotprinter.cc
index b6640626f2c..7cf1e82c4d0 100644
--- a/chromium/v8/src/regexp/regexp-dotprinter.cc
+++ b/chromium/v8/src/regexp/regexp-dotprinter.cc
@@ -143,7 +143,7 @@ void DotPrinterImpl::VisitText(TextNode* that) {
if (node->is_negated()) os_ << "^";
for (int j = 0; j < node->ranges(zone)->length(); j++) {
CharacterRange range = node->ranges(zone)->at(j);
- os_ << AsUC16(range.from()) << "-" << AsUC16(range.to());
+ os_ << AsUC32(range.from()) << "-" << AsUC32(range.to());
}
os_ << "]";
break;
diff --git a/chromium/v8/src/regexp/regexp-interpreter.cc b/chromium/v8/src/regexp/regexp-interpreter.cc
index 0c6d8d5b4be..49215a25446 100644
--- a/chromium/v8/src/regexp/regexp-interpreter.cc
+++ b/chromium/v8/src/regexp/regexp-interpreter.cc
@@ -35,18 +35,23 @@ namespace internal {
namespace {
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
- Vector<const uc16> subject) {
+ Vector<const uc16> subject, bool unicode) {
Address offset_a =
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
Address offset_b =
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
size_t length = len * kUC16Size;
- return RegExpMacroAssembler::CaseInsensitiveCompareUC16(offset_a, offset_b,
- length, isolate) == 1;
+
+ bool result = unicode
+ ? RegExpMacroAssembler::CaseInsensitiveCompareUnicode(
+ offset_a, offset_b, length, isolate)
+ : RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
+ offset_a, offset_b, length, isolate);
+ return result == 1;
}
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
- Vector<const uint8_t> subject) {
+ Vector<const uint8_t> subject, bool unicode) {
// For Latin1 characters the unicode flag makes no difference.
for (int i = 0; i < len; i++) {
unsigned int old_char = subject[from++];
@@ -100,6 +105,18 @@ int32_t Load16AlignedSigned(const byte* pc) {
return *reinterpret_cast<const int16_t*>(pc);
}
+// Helpers to access the packed argument. Takes the 32 bits containing the
+// current bytecode, where the 8 LSB contain the bytecode and the rest contains
+// a packed 24-bit argument.
+// TODO(jgruber): Specify signed-ness in bytecode signature declarations, and
+// police restrictions during bytecode generation.
+int32_t LoadPacked24Signed(int32_t bytecode_and_packed_arg) {
+ return bytecode_and_packed_arg >> BYTECODE_SHIFT;
+}
+uint32_t LoadPacked24Unsigned(int32_t bytecode_and_packed_arg) {
+ return static_cast<uint32_t>(bytecode_and_packed_arg) >> BYTECODE_SHIFT;
+}
+
// A simple abstraction over the backtracking stack used by the interpreter.
//
// Despite the name 'backtracking' stack, it's actually used as a generic stack
@@ -296,6 +313,12 @@ bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
return (b & (1 << bit)) != 0;
}
+// Returns true iff 0 <= index < length.
+bool IndexIsInBounds(int index, int length) {
+ DCHECK_GE(length, 0);
+ return static_cast<uintptr_t>(index) < static_cast<uintptr_t>(length);
+}
+
// If computed gotos are supported by the compiler, we can get addresses to
// labels directly in C/C++. Every bytecode handler has its own label and we
// store the addresses in a dispatch table indexed by bytecode. To execute the
@@ -337,6 +360,14 @@ bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
next_pc = code_base + offset; \
DECODE()
+// Current position mutations.
+#define SET_CURRENT_POSITION(value) \
+ do { \
+ current = (value); \
+ DCHECK(base::IsInRange(current, 0, subject.length())); \
+ } while (false)
+#define ADVANCE_CURRENT_POSITION(by) SET_CURRENT_POSITION(current + (by))
+
#ifdef DEBUG
#define BYTECODE(name) \
BC_LABEL(name) \
@@ -447,44 +478,44 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(PUSH_REGISTER) {
ADVANCE(PUSH_REGISTER);
- if (!backtrack_stack.push(registers[insn >> BYTECODE_SHIFT])) {
+ if (!backtrack_stack.push(registers[LoadPacked24Unsigned(insn)])) {
return MaybeThrowStackOverflow(isolate, call_origin);
}
DISPATCH();
}
BYTECODE(SET_REGISTER) {
ADVANCE(SET_REGISTER);
- registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
+ registers[LoadPacked24Unsigned(insn)] = Load32Aligned(pc + 4);
DISPATCH();
}
BYTECODE(ADVANCE_REGISTER) {
ADVANCE(ADVANCE_REGISTER);
- registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
+ registers[LoadPacked24Unsigned(insn)] += Load32Aligned(pc + 4);
DISPATCH();
}
BYTECODE(SET_REGISTER_TO_CP) {
ADVANCE(SET_REGISTER_TO_CP);
- registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
+ registers[LoadPacked24Unsigned(insn)] = current + Load32Aligned(pc + 4);
DISPATCH();
}
BYTECODE(SET_CP_TO_REGISTER) {
ADVANCE(SET_CP_TO_REGISTER);
- current = registers[insn >> BYTECODE_SHIFT];
+ SET_CURRENT_POSITION(registers[LoadPacked24Unsigned(insn)]);
DISPATCH();
}
BYTECODE(SET_REGISTER_TO_SP) {
ADVANCE(SET_REGISTER_TO_SP);
- registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp();
+ registers[LoadPacked24Unsigned(insn)] = backtrack_stack.sp();
DISPATCH();
}
BYTECODE(SET_SP_TO_REGISTER) {
ADVANCE(SET_SP_TO_REGISTER);
- backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]);
+ backtrack_stack.set_sp(registers[LoadPacked24Unsigned(insn)]);
DISPATCH();
}
BYTECODE(POP_CP) {
ADVANCE(POP_CP);
- current = backtrack_stack.pop();
+ SET_CURRENT_POSITION(backtrack_stack.pop());
DISPATCH();
}
BYTECODE(POP_BT) {
@@ -504,7 +535,7 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(POP_REGISTER) {
ADVANCE(POP_REGISTER);
- registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop();
+ registers[LoadPacked24Unsigned(insn)] = backtrack_stack.pop();
DISPATCH();
}
BYTECODE(FAIL) {
@@ -520,7 +551,7 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(ADVANCE_CP) {
ADVANCE(ADVANCE_CP);
- current += insn >> BYTECODE_SHIFT;
+ ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
DISPATCH();
}
BYTECODE(GOTO) {
@@ -529,7 +560,7 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(ADVANCE_CP_AND_GOTO) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- current += insn >> BYTECODE_SHIFT;
+ ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
DISPATCH();
}
BYTECODE(CHECK_GREEDY) {
@@ -542,7 +573,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(LOAD_CURRENT_CHAR) {
- int pos = current + (insn >> BYTECODE_SHIFT);
+ int pos = current + LoadPacked24Signed(insn);
if (pos >= subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -553,12 +584,12 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
- int pos = current + (insn >> BYTECODE_SHIFT);
+ int pos = current + LoadPacked24Signed(insn);
current_char = subject[pos];
DISPATCH();
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
- int pos = current + (insn >> BYTECODE_SHIFT);
+ int pos = current + LoadPacked24Signed(insn);
if (pos + 2 > subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -570,14 +601,14 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
- int pos = current + (insn >> BYTECODE_SHIFT);
+ int pos = current + LoadPacked24Signed(insn);
Char next = subject[pos + 1];
current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
DISPATCH();
}
BYTECODE(LOAD_4_CURRENT_CHARS) {
DCHECK_EQ(1, sizeof(Char));
- int pos = current + (insn >> BYTECODE_SHIFT);
+ int pos = current + LoadPacked24Signed(insn);
if (pos + 4 > subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -593,7 +624,7 @@ IrregexpInterpreter::Result RawMatch(
BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
DCHECK_EQ(1, sizeof(Char));
- int pos = current + (insn >> BYTECODE_SHIFT);
+ int pos = current + LoadPacked24Signed(insn);
Char next1 = subject[pos + 1];
Char next2 = subject[pos + 2];
Char next3 = subject[pos + 3];
@@ -611,7 +642,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
+ uint32_t c = LoadPacked24Unsigned(insn);
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -629,7 +660,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
+ uint32_t c = LoadPacked24Unsigned(insn);
if (c != current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -647,7 +678,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(AND_CHECK_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
+ uint32_t c = LoadPacked24Unsigned(insn);
if (c == (current_char & Load32Aligned(pc + 4))) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
@@ -665,7 +696,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(AND_CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
+ uint32_t c = LoadPacked24Unsigned(insn);
if (c != (current_char & Load32Aligned(pc + 4))) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
@@ -674,7 +705,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
+ uint32_t c = LoadPacked24Unsigned(insn);
uint32_t minus = Load16Aligned(pc + 4);
uint32_t mask = Load16Aligned(pc + 6);
if (c != ((current_char - minus) & mask)) {
@@ -713,7 +744,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_LT) {
- uint32_t limit = (insn >> BYTECODE_SHIFT);
+ uint32_t limit = LoadPacked24Unsigned(insn);
if (current_char < limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -722,7 +753,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_GT) {
- uint32_t limit = (insn >> BYTECODE_SHIFT);
+ uint32_t limit = LoadPacked24Unsigned(insn);
if (current_char > limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -731,7 +762,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_REGISTER_LT) {
- if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
+ if (registers[LoadPacked24Unsigned(insn)] < Load32Aligned(pc + 4)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
ADVANCE(CHECK_REGISTER_LT);
@@ -739,7 +770,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_REGISTER_GE) {
- if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
+ if (registers[LoadPacked24Unsigned(insn)] >= Load32Aligned(pc + 4)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
ADVANCE(CHECK_REGISTER_GE);
@@ -747,7 +778,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_REGISTER_EQ_POS) {
- if (registers[insn >> BYTECODE_SHIFT] == current) {
+ if (registers[LoadPacked24Unsigned(insn)] == current) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
ADVANCE(CHECK_REGISTER_EQ_POS);
@@ -755,7 +786,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_REGS_EQUAL) {
- if (registers[insn >> BYTECODE_SHIFT] ==
+ if (registers[LoadPacked24Unsigned(insn)] ==
registers[Load32Aligned(pc + 4)]) {
ADVANCE(CHECK_NOT_REGS_EQUAL);
} else {
@@ -764,69 +795,94 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ int from = registers[LoadPacked24Unsigned(insn)];
+ int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
CompareChars(&subject[from], &subject[current], len) != 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
- current += len;
+ ADVANCE_CURRENT_POSITION(len);
}
ADVANCE(CHECK_NOT_BACK_REF);
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ int from = registers[LoadPacked24Unsigned(insn)];
+ int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
CompareChars(&subject[from], &subject[current - len], len) != 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
- current -= len;
+ SET_CURRENT_POSITION(current - len);
}
ADVANCE(CHECK_NOT_BACK_REF_BACKWARD);
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
- UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
+ int from = registers[LoadPacked24Unsigned(insn)];
+ int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current + len > subject.length() ||
+ !BackRefMatchesNoCase(isolate, from, current, len, subject, true)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
+ }
+ ADVANCE_CURRENT_POSITION(len);
+ }
+ ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE);
+ DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ int from = registers[LoadPacked24Unsigned(insn)];
+ int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
- !BackRefMatchesNoCase(isolate, from, current, len, subject)) {
+ !BackRefMatchesNoCase(isolate, from, current, len, subject,
+ false)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
- current += len;
+ ADVANCE_CURRENT_POSITION(len);
}
ADVANCE(CHECK_NOT_BACK_REF_NO_CASE);
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) {
- UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
+ int from = registers[LoadPacked24Unsigned(insn)];
+ int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current - len < 0 ||
+ !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
+ true)) {
+ SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
+ DISPATCH();
+ }
+ SET_CURRENT_POSITION(current - len);
+ }
+ ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD);
+ DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ int from = registers[LoadPacked24Unsigned(insn)];
+ int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
- !BackRefMatchesNoCase(isolate, from, current - len, len, subject)) {
+ !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
+ false)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
DISPATCH();
}
- current -= len;
+ SET_CURRENT_POSITION(current - len);
}
ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD);
DISPATCH();
}
BYTECODE(CHECK_AT_START) {
- if (current + (insn >> BYTECODE_SHIFT) == 0) {
+ if (current + LoadPacked24Signed(insn) == 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
ADVANCE(CHECK_AT_START);
@@ -834,7 +890,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_AT_START) {
- if (current + (insn >> BYTECODE_SHIFT) == 0) {
+ if (current + LoadPacked24Signed(insn) == 0) {
ADVANCE(CHECK_NOT_AT_START);
} else {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
@@ -843,15 +899,15 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(SET_CURRENT_POSITION_FROM_END) {
ADVANCE(SET_CURRENT_POSITION_FROM_END);
- int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
+ int by = LoadPacked24Unsigned(insn);
if (subject.length() - current > by) {
- current = subject.length() - by;
+ SET_CURRENT_POSITION(subject.length() - by);
current_char = subject[current - 1];
}
DISPATCH();
}
BYTECODE(CHECK_CURRENT_POSITION) {
- int pos = current + (insn >> BYTECODE_SHIFT);
+ int pos = current + LoadPacked24Signed(insn);
if (pos > subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@@ -860,23 +916,22 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR) {
- int load_offset = (insn >> BYTECODE_SHIFT);
+ int32_t load_offset = LoadPacked24Signed(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint32_t c = Load16Aligned(pc + 6);
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
+ while (IndexIsInBounds(current + load_offset, subject.length())) {
current_char = subject[current + load_offset];
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
DISPATCH();
}
- current += advance;
+ ADVANCE_CURRENT_POSITION(advance);
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_AND) {
- int load_offset = (insn >> BYTECODE_SHIFT);
+ int32_t load_offset = LoadPacked24Signed(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
uint32_t mask = Load32Aligned(pc + 8);
@@ -888,13 +943,13 @@ IrregexpInterpreter::Result RawMatch(
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
- current += advance;
+ ADVANCE_CURRENT_POSITION(advance);
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
- int load_offset = (insn >> BYTECODE_SHIFT);
+ int32_t load_offset = LoadPacked24Signed(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
int32_t maximum_offset = Load32Aligned(pc + 8);
@@ -905,34 +960,32 @@ IrregexpInterpreter::Result RawMatch(
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
- current += advance;
+ ADVANCE_CURRENT_POSITION(advance);
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
- int load_offset = (insn >> BYTECODE_SHIFT);
+ int32_t load_offset = LoadPacked24Signed(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
const byte* table = pc + 8;
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
+ while (IndexIsInBounds(current + load_offset, subject.length())) {
current_char = subject[current + load_offset];
if (CheckBitInTable(current_char, table)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
- current += advance;
+ ADVANCE_CURRENT_POSITION(advance);
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
- int load_offset = (insn >> BYTECODE_SHIFT);
+ int32_t load_offset = LoadPacked24Signed(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint16_t limit = Load16Aligned(pc + 6);
const byte* table = pc + 8;
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
+ while (IndexIsInBounds(current + load_offset, subject.length())) {
current_char = subject[current + load_offset];
if (current_char > limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
@@ -942,18 +995,17 @@ IrregexpInterpreter::Result RawMatch(
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
- current += advance;
+ ADVANCE_CURRENT_POSITION(advance);
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
- int load_offset = (insn >> BYTECODE_SHIFT);
+ int32_t load_offset = LoadPacked24Signed(insn);
int32_t advance = Load32Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 8);
uint16_t c2 = Load16Aligned(pc + 10);
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
+ while (IndexIsInBounds(current + load_offset, subject.length())) {
current_char = subject[current + load_offset];
// The two if-statements below are split up intentionally, as combining
// them seems to result in register allocation behaving quite
@@ -966,7 +1018,7 @@ IrregexpInterpreter::Result RawMatch(
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
- current += advance;
+ ADVANCE_CURRENT_POSITION(advance);
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
@@ -986,6 +1038,8 @@ IrregexpInterpreter::Result RawMatch(
}
#undef BYTECODE
+#undef ADVANCE_CURRENT_POSITION
+#undef SET_CURRENT_POSITION
#undef DISPATCH
#undef DECODE
#undef SET_PC_FROM_OFFSET
diff --git a/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc b/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc
index 0a122017437..d1feec4c33d 100644
--- a/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc
+++ b/chromium/v8/src/regexp/regexp-macro-assembler-tracer.cc
@@ -352,11 +352,11 @@ void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
}
void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
- PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n",
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
+ PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s %s, label[%08x]);\n",
start_reg, read_backward ? "backward" : "forward",
- LabelToInt(on_no_match));
- assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward,
+ unicode ? "unicode" : "non-unicode", LabelToInt(on_no_match));
+ assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward, unicode,
on_no_match);
}
diff --git a/chromium/v8/src/regexp/regexp-macro-assembler-tracer.h b/chromium/v8/src/regexp/regexp-macro-assembler-tracer.h
index b6ad63071f4..2a44146e738 100644
--- a/chromium/v8/src/regexp/regexp-macro-assembler-tracer.h
+++ b/chromium/v8/src/regexp/regexp-macro-assembler-tracer.h
@@ -33,6 +33,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ bool unicode,
Label* on_no_match) override;
void CheckNotCharacter(unsigned c, Label* on_not_equal) override;
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with,
diff --git a/chromium/v8/src/regexp/regexp-macro-assembler.cc b/chromium/v8/src/regexp/regexp-macro-assembler.cc
index 6cc9cae6e1d..cf4346309eb 100644
--- a/chromium/v8/src/regexp/regexp-macro-assembler.cc
+++ b/chromium/v8/src/regexp/regexp-macro-assembler.cc
@@ -9,6 +9,7 @@
#include "src/execution/pointer-authentication.h"
#include "src/execution/simulator.h"
#include "src/regexp/regexp-stack.h"
+#include "src/regexp/special-case.h"
#include "src/strings/unicode-inl.h"
#ifdef V8_INTL_SUPPORT
@@ -27,17 +28,46 @@ RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
RegExpMacroAssembler::~RegExpMacroAssembler() = default;
-int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
- Address byte_offset2,
- size_t byte_length,
- Isolate* isolate) {
+int RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(Address byte_offset1,
+ Address byte_offset2,
+ size_t byte_length,
+ Isolate* isolate) {
+#ifdef V8_INTL_SUPPORT
+ // This function is not allowed to cause a garbage collection.
+ // A GC might move the calling generated code and invalidate the
+ // return address on the stack.
+ DisallowHeapAllocation no_gc;
+ DCHECK_EQ(0, byte_length % 2);
+ size_t length = byte_length / 2;
+ uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
+ uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
+
+ for (size_t i = 0; i < length; i++) {
+ UChar32 c1 = RegExpCaseFolding::Canonicalize(substring1[i]);
+ UChar32 c2 = RegExpCaseFolding::Canonicalize(substring2[i]);
+ if (c1 != c2) {
+ return 0;
+ }
+ }
+ return 1;
+#else
+ return CaseInsensitiveCompareUnicode(byte_offset1, byte_offset2, byte_length,
+ isolate);
+#endif
+}
+
+int RegExpMacroAssembler::CaseInsensitiveCompareUnicode(Address byte_offset1,
+ Address byte_offset2,
+ size_t byte_length,
+ Isolate* isolate) {
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
+ DisallowHeapAllocation no_gc;
DCHECK_EQ(0, byte_length % 2);
#ifdef V8_INTL_SUPPORT
- int32_t length = (int32_t)(byte_length >> 1);
+ int32_t length = static_cast<int32_t>(byte_length >> 1);
icu::UnicodeString uni_str_1(reinterpret_cast<const char16_t*>(byte_offset1),
length);
return uni_str_1.caseCompare(reinterpret_cast<const char16_t*>(byte_offset2),
@@ -68,7 +98,6 @@ int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
#endif // V8_INTL_SUPPORT
}
-
void RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset,
Label* on_failure) {
Label ok;
diff --git a/chromium/v8/src/regexp/regexp-macro-assembler.h b/chromium/v8/src/regexp/regexp-macro-assembler.h
index 289c2a979e6..52465610cb6 100644
--- a/chromium/v8/src/regexp/regexp-macro-assembler.h
+++ b/chromium/v8/src/regexp/regexp-macro-assembler.h
@@ -88,7 +88,7 @@ class RegExpMacroAssembler {
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
@@ -165,11 +165,16 @@ class RegExpMacroAssembler {
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
- // Compares two-byte strings case insensitively.
+ // Compare two-byte strings case insensitively.
// Called from generated RegExp code.
- static int CaseInsensitiveCompareUC16(Address byte_offset1,
- Address byte_offset2,
- size_t byte_length, Isolate* isolate);
+ static int CaseInsensitiveCompareNonUnicode(Address byte_offset1,
+ Address byte_offset2,
+ size_t byte_length,
+ Isolate* isolate);
+ static int CaseInsensitiveCompareUnicode(Address byte_offset1,
+ Address byte_offset2,
+ size_t byte_length,
+ Isolate* isolate);
// Check that we are not in the middle of a surrogate pair.
void CheckNotInSurrogatePair(int cp_offset, Label* on_failure);
diff --git a/chromium/v8/src/regexp/regexp-parser.cc b/chromium/v8/src/regexp/regexp-parser.cc
index 3c1115414fb..7b87044ca65 100644
--- a/chromium/v8/src/regexp/regexp-parser.cc
+++ b/chromium/v8/src/regexp/regexp-parser.cc
@@ -1301,7 +1301,7 @@ bool LookupSpecialPropertyValueName(const char* name,
return true;
}
-// Explicitly whitelist supported binary properties. The spec forbids supporting
+// Explicitly allowlist supported binary properties. The spec forbids supporting
// properties outside of this set to ensure interoperability.
bool IsSupportedBinaryProperty(UProperty property) {
switch (property) {
@@ -1550,7 +1550,7 @@ bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
}
while (d >= 0) {
x = x * 16 + d;
- if (x > max_value) {
+ if (x > static_cast<uc32>(max_value)) {
return false;
}
Advance();
@@ -1789,34 +1789,54 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
#undef CHECK_FAILED
-
-bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
- FlatStringReader* input, JSRegExp::Flags flags,
- RegExpCompileData* result) {
+bool RegExpParser::Parse(RegExpCompileData* result,
+ const DisallowHeapAllocation&) {
DCHECK(result != nullptr);
- RegExpParser parser(input, flags, isolate, zone);
- RegExpTree* tree = parser.ParsePattern();
- if (parser.failed()) {
+ RegExpTree* tree = ParsePattern();
+ if (failed()) {
DCHECK(tree == nullptr);
- DCHECK(parser.error_ != RegExpError::kNone);
- result->error = parser.error_;
- result->error_pos = parser.error_pos_;
+ DCHECK(error_ != RegExpError::kNone);
+ result->error = error_;
+ result->error_pos = error_pos_;
} else {
DCHECK(tree != nullptr);
- DCHECK(parser.error_ == RegExpError::kNone);
+ DCHECK(error_ == RegExpError::kNone);
if (FLAG_trace_regexp_parser) {
StdoutStream os;
- tree->Print(os, zone);
+ tree->Print(os, zone());
os << "\n";
}
result->tree = tree;
- int capture_count = parser.captures_started();
- result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
- result->contains_anchor = parser.contains_anchor();
- result->capture_name_map = parser.CreateCaptureNameMap();
+ int capture_count = captures_started();
+ result->simple = tree->IsAtom() && simple() && capture_count == 0;
+ result->contains_anchor = contains_anchor();
result->capture_count = capture_count;
}
- return !parser.failed();
+ return !failed();
+}
+
+bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
+ FlatStringReader* input, JSRegExp::Flags flags,
+ RegExpCompileData* result) {
+ RegExpParser parser(input, flags, isolate, zone);
+ bool success;
+ {
+ DisallowHeapAllocation no_gc;
+ success = parser.Parse(result, no_gc);
+ }
+ if (success) {
+ result->capture_name_map = parser.CreateCaptureNameMap();
+ }
+ return success;
+}
+
+bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
+ FlatStringReader* input,
+ JSRegExp::Flags flags,
+ RegExpCompileData* result,
+ const DisallowHeapAllocation& no_gc) {
+ RegExpParser parser(input, flags, isolate, zone);
+ return parser.Parse(result, no_gc);
}
RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags)
diff --git a/chromium/v8/src/regexp/regexp-parser.h b/chromium/v8/src/regexp/regexp-parser.h
index aff1746bc53..bfb08208980 100644
--- a/chromium/v8/src/regexp/regexp-parser.h
+++ b/chromium/v8/src/regexp/regexp-parser.h
@@ -159,6 +159,13 @@ class V8_EXPORT_PRIVATE RegExpParser {
static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
JSRegExp::Flags flags, RegExpCompileData* result);
+ static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
+ FlatStringReader* input, JSRegExp::Flags flags,
+ RegExpCompileData* result,
+ const DisallowHeapAllocation& no_gc);
+
+ private:
+ bool Parse(RegExpCompileData* result, const DisallowHeapAllocation&);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
diff --git a/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.cc b/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
index 2109b45314a..9ac4f755227 100644
--- a/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
+++ b/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.cc
@@ -230,7 +230,7 @@ void RegExpMacroAssemblerS390::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerS390::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
__ LoadP(r2, register_location(start_reg)); // Index of start of
// capture
@@ -346,7 +346,10 @@ void RegExpMacroAssemblerS390::CheckNotBackReferenceIgnoreCase(
{
AllowExternalCallThatCantCauseGC scope(masm_);
ExternalReference function =
- ExternalReference::re_case_insensitive_compare_uc16(isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(function, argument_count);
}
diff --git a/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.h b/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.h
index 9ced67fe274..e4f88f51b9a 100644
--- a/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.h
+++ b/chromium/v8/src/regexp/s390/regexp-macro-assembler-s390.h
@@ -36,7 +36,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
+ bool read_backward, bool unicode,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
diff --git a/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
index cf8eb6604c9..ef3e48428f0 100644
--- a/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -215,7 +215,7 @@ void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
}
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
+ int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture
@@ -354,7 +354,10 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
// linter.
AllowExternalCallThatCantCauseGC scope(&masm_);
ExternalReference compare =
- ExternalReference::re_case_insensitive_compare_uc16(isolate());
+ unicode ? ExternalReference::re_case_insensitive_compare_unicode(
+ isolate())
+ : ExternalReference::re_case_insensitive_compare_non_unicode(
+ isolate());
__ CallCFunction(compare, num_arguments);
}
diff --git a/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.h b/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.h
index 551e9bc6ec7..ea4d45edba8 100644
--- a/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.h
+++ b/chromium/v8/src/regexp/x64/regexp-macro-assembler-x64.h
@@ -37,6 +37,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ bool unicode,
Label* on_no_match) override;
void CheckNotCharacter(uint32_t c, Label* on_not_equal) override;
void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,