diff options
Diffstat (limited to 'deps/v8/src/regexp')
39 files changed, 741 insertions, 284 deletions
diff --git a/deps/v8/src/regexp/DIR_METADATA b/deps/v8/src/regexp/DIR_METADATA new file mode 100644 index 0000000000..b183b81885 --- /dev/null +++ b/deps/v8/src/regexp/DIR_METADATA @@ -0,0 +1,11 @@ +# Metadata information for this directory. +# +# For more information on DIR_METADATA files, see: +# https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/README.md +# +# For the schema of this file, see Metadata message: +# https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/proto/dir_metadata.proto + +monorail { + component: "Blink>JavaScript>Runtime" +}
\ No newline at end of file diff --git a/deps/v8/src/regexp/OWNERS b/deps/v8/src/regexp/OWNERS index 250c8c6b88..3322bb9505 100644 --- a/deps/v8/src/regexp/OWNERS +++ b/deps/v8/src/regexp/OWNERS @@ -1,4 +1,2 @@ jgruber@chromium.org yangguo@chromium.org - -# COMPONENT: Blink>JavaScript>Runtime diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc index 78b586e265..48e8fae663 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc @@ -127,6 +127,7 @@ RegExpMacroAssemblerARM::~RegExpMacroAssemblerARM() { exit_label_.Unuse(); check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); + fallback_label_.Unuse(); } @@ -164,8 +165,13 @@ void RegExpMacroAssemblerARM::Backtrack() { __ cmp(r0, Operand(backtrack_limit())); __ b(ne, &next); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ jmp(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -901,6 +907,12 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { __ jmp(&return_r0); } + if (fallback_label_.is_linked()) { + __ bind(&fallback_label_); + __ mov(r0, Operand(FALLBACK_TO_EXPERIMENTAL)); + __ jmp(&return_r0); + } + CodeDesc code_desc; masm_->GetCode(isolate(), &code_desc); Handle<Code> code = @@ -1072,7 +1084,6 @@ void RegExpMacroAssemblerARM::CallCheckStackGuardState() { __ mov(ip, Operand(stack_guard_check)); EmbeddedData d = EmbeddedData::FromBlob(); - CHECK(Builtins::IsIsolateIndependent(Builtins::kDirectCEntry)); Address entry = d.InstructionStartOfBuiltin(Builtins::kDirectCEntry); __ mov(lr, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); __ Call(lr); diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h index 910e5c4607..92cac644e5 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h @@ -203,6 +203,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM Label exit_label_; Label check_preempt_label_; Label stack_overflow_label_; + Label fallback_label_; }; } // namespace internal diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc index ac33f8631f..32fed3703b 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc @@ -142,6 +142,7 @@ RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() { exit_label_.Unuse(); check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); + fallback_label_.Unuse(); } int RegExpMacroAssemblerARM64::stack_limit_slack() { @@ -201,8 +202,13 @@ void RegExpMacroAssemblerARM64::Backtrack() { __ Cmp(scratch, Operand(backtrack_limit())); __ B(ne, &next); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ B(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -1094,6 +1100,12 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { __ B(&return_w0); } + if (fallback_label_.is_linked()) { + __ Bind(&fallback_label_); + __ Mov(w0, FALLBACK_TO_EXPERIMENTAL); + __ B(&return_w0); + } + CodeDesc code_desc; masm_->GetCode(isolate(), &code_desc); Handle<Code> code = @@ -1399,7 +1411,6 @@ void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) { Register scratch = temps.AcquireX(); EmbeddedData d = EmbeddedData::FromBlob(); - CHECK(Builtins::IsIsolateIndependent(Builtins::kDirectCEntry)); Address entry = d.InstructionStartOfBuiltin(Builtins::kDirectCEntry); __ Ldr(scratch, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h index aeb49aa9ff..6d60271a43 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h @@ -279,6 +279,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64 Label exit_label_; Label check_preempt_label_; Label stack_overflow_label_; + Label fallback_label_; }; } // namespace internal diff --git a/deps/v8/src/regexp/experimental/experimental-bytecode.h b/deps/v8/src/regexp/experimental/experimental-bytecode.h index 3cb65828c5..4e9bc9396b 100644 --- a/deps/v8/src/regexp/experimental/experimental-bytecode.h +++ b/deps/v8/src/regexp/experimental/experimental-bytecode.h @@ -106,21 +106,21 @@ struct RegExpInstruction { uc16 max; // Inclusive. }; - static RegExpInstruction ConsumeRange(Uc16Range consume_range) { + static RegExpInstruction ConsumeRange(uc16 min, uc16 max) { RegExpInstruction result; result.opcode = CONSUME_RANGE; - result.payload.consume_range = consume_range; + result.payload.consume_range = Uc16Range{min, max}; return result; } static RegExpInstruction ConsumeAnyChar() { - return ConsumeRange(Uc16Range{0x0000, 0xFFFF}); + return ConsumeRange(0x0000, 0xFFFF); } static RegExpInstruction Fail() { // This is encoded as the empty CONSUME_RANGE of characters 0xFFFF <= c <= // 0x0000. - return ConsumeRange(Uc16Range{0xFFFF, 0x0000}); + return ConsumeRange(0xFFFF, 0x0000); } static RegExpInstruction Fork(int32_t alt_index) { diff --git a/deps/v8/src/regexp/experimental/experimental-compiler.cc b/deps/v8/src/regexp/experimental/experimental-compiler.cc index 615f7566f4..4d53c2c0c5 100644 --- a/deps/v8/src/regexp/experimental/experimental-compiler.cc +++ b/deps/v8/src/regexp/experimental/experimental-compiler.cc @@ -35,7 +35,7 @@ class CanBeHandledVisitor final : private RegExpVisitor { // future. static constexpr JSRegExp::Flags kAllowedFlags = JSRegExp::kGlobal | JSRegExp::kSticky | JSRegExp::kMultiline | - JSRegExp::kDotAll; + JSRegExp::kDotAll | JSRegExp::kLinear; // We support Unicode iff kUnicode is among the supported flags. STATIC_ASSERT(ExperimentalRegExp::kSupportsUnicode == ((kAllowedFlags & JSRegExp::kUnicode) != 0)); @@ -177,94 +177,120 @@ class CanBeHandledVisitor final : private RegExpVisitor { bool ExperimentalRegExpCompiler::CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags, int capture_count) { - DCHECK(FLAG_enable_experimental_regexp_engine); return CanBeHandledVisitor::Check(tree, flags, capture_count); } namespace { -// A label in bytecode with known address. -class Label { +// A label in bytecode which starts with no known address. The address *must* +// be bound with `Bind` before the label goes out of scope. +// Implemented as a linked list through the `payload.pc` of FORK and JMP +// instructions. +struct Label { public: - explicit Label(int index) : index_(index) { DCHECK_GE(index_, 0); } - - int index() { return index_; } - - // Friend functions because `label.AddForkTo(code, zone)` reads like we're - // adding code to where `label` is defined, but we're adding a fork with - // target `label` at the end of `code`. - friend void AddForkTo(Label target, ZoneList<RegExpInstruction>& code, - Zone* zone) { - code.Add(RegExpInstruction::Fork(target.index_), zone); + Label() = default; + ~Label() { + DCHECK_EQ(state_, BOUND); + DCHECK_GE(bound_index_, 0); } - friend void AddJmpTo(Label target, ZoneList<RegExpInstruction>& code, - Zone* zone) { - code.Add(RegExpInstruction::Jmp(target.index_), zone); - } + // Don't copy, don't move. Moving could be implemented, but it's not + // needed anywhere. + Label(const Label&) = delete; + Label& operator=(const Label&) = delete; private: - int index_; + friend class BytecodeAssembler; + + // UNBOUND implies unbound_patch_list_begin_. + // BOUND implies bound_index_. + enum { UNBOUND, BOUND } state_ = UNBOUND; + union { + int unbound_patch_list_begin_ = -1; + int bound_index_; + }; }; -// A label in bytecode whose address is not known yet. The address *must* be -// `Bind` before the deferred label object goes out of scope, and the deferred -// label object *must not* be used after it was defined. (Use the `Label` -// object returned by `Bind` instead.) -struct DeferredLabel { - // Implemented as a linked list through the `payload.pc` of FORK and JMP - // instructions. +class BytecodeAssembler { public: - DeferredLabel() = default; - ~DeferredLabel() { DCHECK_EQ(patch_list_begin_, kLabelWasDefined); } + // TODO(mbid,v8:10765): Use some upper bound for code_ capacity computed from + // the `tree` size we're going to compile? + explicit BytecodeAssembler(Zone* zone) : zone_(zone), code_(0, zone) {} + + ZoneList<RegExpInstruction> IntoCode() && { return std::move(code_); } + + void Accept() { code_.Add(RegExpInstruction::Accept(), zone_); } + + void Assertion(RegExpAssertion::AssertionType t) { + code_.Add(RegExpInstruction::Assertion(t), zone_); + } + + void ClearRegister(int32_t register_index) { + code_.Add(RegExpInstruction::ClearRegister(register_index), zone_); + } + + void ConsumeRange(uc16 from, uc16 to) { + code_.Add(RegExpInstruction::ConsumeRange(from, to), zone_); + } + + void ConsumeAnyChar() { + code_.Add(RegExpInstruction::ConsumeAnyChar(), zone_); + } + + void Fork(Label& target) { + LabelledInstrImpl(RegExpInstruction::Opcode::FORK, target); + } - friend void AddForkTo(DeferredLabel& target, - ZoneList<RegExpInstruction>& code, Zone* zone) { - DCHECK_NE(target.patch_list_begin_, DeferredLabel::kLabelWasDefined); - int new_list_begin = code.length(); - DCHECK_GE(new_list_begin, 0); - code.Add(RegExpInstruction::Fork(target.patch_list_begin_), zone); - target.patch_list_begin_ = new_list_begin; + void Jmp(Label& target) { + LabelledInstrImpl(RegExpInstruction::Opcode::JMP, target); } - friend void AddJmpTo(DeferredLabel& target, ZoneList<RegExpInstruction>& code, - Zone* zone) { - DCHECK_NE(target.patch_list_begin_, DeferredLabel::kLabelWasDefined); - int new_list_begin = code.length(); - DCHECK_GE(new_list_begin, 0); - code.Add(RegExpInstruction::Jmp(target.patch_list_begin_), zone); - target.patch_list_begin_ = new_list_begin; + void SetRegisterToCp(int32_t register_index) { + code_.Add(RegExpInstruction::SetRegisterToCp(register_index), zone_); } - // Define the deferred label as referring to the next instruction that will - // be pushed to `code`. Consumes the DeferredLabel object and returns a - // Label object. - Label Bind(ZoneList<RegExpInstruction>& code) && { - DCHECK_NE(patch_list_begin_, kLabelWasDefined); + void Bind(Label& target) { + DCHECK_EQ(target.state_, Label::UNBOUND); - int index = code.length(); + int index = code_.length(); - while (patch_list_begin_ != kEmptyList) { - RegExpInstruction& inst = code[patch_list_begin_]; + while (target.unbound_patch_list_begin_ != -1) { + RegExpInstruction& inst = code_[target.unbound_patch_list_begin_]; DCHECK(inst.opcode == RegExpInstruction::FORK || inst.opcode == RegExpInstruction::JMP); - patch_list_begin_ = inst.payload.pc; + target.unbound_patch_list_begin_ = inst.payload.pc; inst.payload.pc = index; } - patch_list_begin_ = kLabelWasDefined; - return Label(index); + target.state_ = Label::BOUND; + target.bound_index_ = index; } + void Fail() { code_.Add(RegExpInstruction::Fail(), zone_); } + private: - static constexpr int kEmptyList = -1; - static constexpr int kLabelWasDefined = -2; - int patch_list_begin_ = kEmptyList; + void LabelledInstrImpl(RegExpInstruction::Opcode op, Label& target) { + RegExpInstruction result; + result.opcode = op; - // Don't copy, don't move. Moving could be implemented, but it's not - // needed anywhere. - DISALLOW_COPY_AND_ASSIGN(DeferredLabel); + if (target.state_ == Label::BOUND) { + result.payload.pc = target.bound_index_; + } else { + DCHECK_EQ(target.state_, Label::UNBOUND); + int new_list_begin = code_.length(); + DCHECK_GE(new_list_begin, 0); + + result.payload.pc = target.unbound_patch_list_begin_; + + target.unbound_patch_list_begin_ = new_list_begin; + } + + code_.Add(result, zone_); + } + + Zone* zone_; + ZoneList<RegExpInstruction> code_; }; class CompileVisitor : private RegExpVisitor { @@ -278,27 +304,24 @@ class CompileVisitor : private RegExpVisitor { // The match is not anchored, i.e. may start at any input position, so we // emit a preamble corresponding to /.*?/. This skips an arbitrary // prefix in the input non-greedily. - compiler.CompileNonGreedyStar([&]() { - compiler.code_.Add(RegExpInstruction::ConsumeAnyChar(), zone); - }); + compiler.CompileNonGreedyStar( + [&]() { compiler.assembler_.ConsumeAnyChar(); }); } - compiler.code_.Add(RegExpInstruction::SetRegisterToCp(0), zone); + compiler.assembler_.SetRegisterToCp(0); tree->Accept(&compiler, nullptr); - compiler.code_.Add(RegExpInstruction::SetRegisterToCp(1), zone); - compiler.code_.Add(RegExpInstruction::Accept(), zone); + compiler.assembler_.SetRegisterToCp(1); + compiler.assembler_.Accept(); - return std::move(compiler.code_); + return std::move(compiler.assembler_).IntoCode(); } private: - // TODO(mbid,v8:10765): Use some upper bound for code_ capacity computed from - // the `tree` size we're going to compile? - explicit CompileVisitor(Zone* zone) : zone_(zone), code_(0, zone) {} + explicit CompileVisitor(Zone* zone) : zone_(zone), assembler_(zone) {} // Generate a disjunction of code fragments compiled by a function `alt_gen`. // `alt_gen` is called repeatedly with argument `int i = 0, 1, ..., alt_num - - // 1` and should push code corresponding to the ith alternative onto `code_`. + // 1` and should build code corresponding to the ith alternative. template <class F> void CompileDisjunction(int alt_num, F&& gen_alt) { // An alternative a1 | ... | an is compiled into @@ -325,23 +348,23 @@ class CompileVisitor : private RegExpVisitor { if (alt_num == 0) { // The empty disjunction. This can never match. - code_.Add(RegExpInstruction::Fail(), zone_); + assembler_.Fail(); return; } - DeferredLabel end; + Label end; for (int i = 0; i != alt_num - 1; ++i) { - DeferredLabel tail; - AddForkTo(tail, code_, zone_); + Label tail; + assembler_.Fork(tail); gen_alt(i); - AddJmpTo(end, code_, zone_); - std::move(tail).Bind(code_); + assembler_.Jmp(end); + assembler_.Bind(tail); } gen_alt(alt_num - 1); - std::move(end).Bind(code_); + assembler_.Bind(end); } void* VisitDisjunction(RegExpDisjunction* node, void*) override { @@ -359,7 +382,7 @@ class CompileVisitor : private RegExpVisitor { } void* VisitAssertion(RegExpAssertion* node, void*) override { - code_.Add(RegExpInstruction::Assertion(node->assertion_type()), zone_); + assembler_.Assertion(node->assertion_type()); return nullptr; } @@ -390,17 +413,14 @@ class CompileVisitor : private RegExpVisitor { DCHECK_IMPLIES(to > kMaxSupportedCodepoint, to == String::kMaxCodePoint); uc16 to_uc16 = static_cast<uc16>(std::min(to, kMaxSupportedCodepoint)); - RegExpInstruction::Uc16Range range{from_uc16, to_uc16}; - code_.Add(RegExpInstruction::ConsumeRange(range), zone_); + assembler_.ConsumeRange(from_uc16, to_uc16); }); return nullptr; } void* VisitAtom(RegExpAtom* node, void*) override { for (uc16 c : node->data()) { - code_.Add( - RegExpInstruction::ConsumeRange(RegExpInstruction::Uc16Range{c, c}), - zone_); + assembler_.ConsumeRange(c, c); } return nullptr; } @@ -413,7 +433,7 @@ class CompileVisitor : private RegExpVisitor { // It suffices to clear the register containing the `begin` of a capture // because this indicates that the capture is undefined, regardless of // the value in the `end` register. - code_.Add(RegExpInstruction::ClearRegister(i), zone_); + assembler_.ClearRegister(i); } } @@ -431,14 +451,15 @@ class CompileVisitor : private RegExpVisitor { // // This is greedy because a forked thread has lower priority than the // thread that spawned it. - Label begin(code_.length()); - DeferredLabel end; + Label begin; + Label end; - AddForkTo(end, code_, zone_); + assembler_.Bind(begin); + assembler_.Fork(end); emit_body(); - AddJmpTo(begin, code_, zone_); + assembler_.Jmp(begin); - std::move(end).Bind(code_); + assembler_.Bind(end); } // Emit bytecode corresponding to /<emit_body>*?/. @@ -454,18 +475,17 @@ class CompileVisitor : private RegExpVisitor { // end: // ... - Label body(code_.length() + 2); - DeferredLabel end; - - AddForkTo(body, code_, zone_); - AddJmpTo(end, code_, zone_); + Label body; + Label end; - DCHECK_EQ(body.index(), code_.length()); + assembler_.Fork(body); + assembler_.Jmp(end); + assembler_.Bind(body); emit_body(); - AddForkTo(body, code_, zone_); + assembler_.Fork(body); - std::move(end).Bind(code_); + assembler_.Bind(end); } // Emit bytecode corresponding to /<emit_body>{0, max_repetition_num}/. @@ -484,12 +504,12 @@ class CompileVisitor : private RegExpVisitor { // end: // ... - DeferredLabel end; + Label end; for (int i = 0; i != max_repetition_num; ++i) { - AddForkTo(end, code_, zone_); + assembler_.Fork(end); emit_body(); } - std::move(end).Bind(code_); + assembler_.Bind(end); } // Emit bytecode corresponding to /<emit_body>{0, max_repetition_num}?/. @@ -512,17 +532,16 @@ class CompileVisitor : private RegExpVisitor { // end: // ... - DeferredLabel end; + Label end; for (int i = 0; i != max_repetition_num; ++i) { - Label body(code_.length() + 2); - AddForkTo(body, code_, zone_); - AddJmpTo(end, code_, zone_); - - DCHECK_EQ(body.index(), code_.length()); + Label body; + assembler_.Fork(body); + assembler_.Jmp(end); + assembler_.Bind(body); emit_body(); } - std::move(end).Bind(code_); + assembler_.Bind(end); } void* VisitQuantifier(RegExpQuantifier* node, void*) override { @@ -571,9 +590,9 @@ class CompileVisitor : private RegExpVisitor { int index = node->index(); int start_register = RegExpCapture::StartRegister(index); int end_register = RegExpCapture::EndRegister(index); - code_.Add(RegExpInstruction::SetRegisterToCp(start_register), zone_); + assembler_.SetRegisterToCp(start_register); node->body()->Accept(this, nullptr); - code_.Add(RegExpInstruction::SetRegisterToCp(end_register), zone_); + assembler_.SetRegisterToCp(end_register); return nullptr; } @@ -602,7 +621,7 @@ class CompileVisitor : private RegExpVisitor { private: Zone* zone_; - ZoneList<RegExpInstruction> code_; + BytecodeAssembler assembler_; }; } // namespace diff --git a/deps/v8/src/regexp/experimental/experimental-interpreter.cc b/deps/v8/src/regexp/experimental/experimental-interpreter.cc index 8db93ca746..fffca782fe 100644 --- a/deps/v8/src/regexp/experimental/experimental-interpreter.cc +++ b/deps/v8/src/regexp/experimental/experimental-interpreter.cc @@ -5,6 +5,8 @@ #include "src/regexp/experimental/experimental-interpreter.h" #include "src/base/optional.h" +#include "src/objects/fixed-array-inl.h" +#include "src/objects/string-inl.h" #include "src/regexp/experimental/experimental.h" #include "src/strings/char-predicates-inl.h" #include "src/zone/zone-allocator.h" @@ -50,6 +52,37 @@ bool SatisfiesAssertion(RegExpAssertion::AssertionType type, } } +Vector<RegExpInstruction> ToInstructionVector( + ByteArray raw_bytes, const DisallowHeapAllocation& no_gc) { + RegExpInstruction* inst_begin = + reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress()); + int inst_num = raw_bytes.length() / sizeof(RegExpInstruction); + DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length()); + return Vector<RegExpInstruction>(inst_begin, inst_num); +} + +template <class Character> +Vector<const Character> ToCharacterVector(String str, + const DisallowHeapAllocation& no_gc); + +template <> +Vector<const uint8_t> ToCharacterVector<uint8_t>( + String str, const DisallowHeapAllocation& no_gc) { + DCHECK(str.IsFlat()); + String::FlatContent content = str.GetFlatContent(no_gc); + DCHECK(content.IsOneByte()); + return content.ToOneByteVector(); +} + +template <> +Vector<const uc16> ToCharacterVector<uc16>( + String str, const DisallowHeapAllocation& no_gc) { + DCHECK(str.IsFlat()); + String::FlatContent content = str.GetFlatContent(no_gc); + DCHECK(content.IsTwoByte()); + return content.ToUC16Vector(); +} + template <class Character> class NfaInterpreter { // Executes a bytecode program in breadth-first mode, without backtracking. @@ -100,12 +133,16 @@ class NfaInterpreter { // with high priority are left, we return the match that was produced by the // ACCEPTing thread with highest priority. public: - NfaInterpreter(Vector<const RegExpInstruction> bytecode, - int register_count_per_match, Vector<const Character> input, + NfaInterpreter(Isolate* isolate, RegExp::CallOrigin call_origin, + ByteArray bytecode, int register_count_per_match, String input, int32_t input_index, Zone* zone) - : bytecode_(bytecode), + : isolate_(isolate), + call_origin_(call_origin), + bytecode_object_(bytecode), + bytecode_(ToInstructionVector(bytecode, no_gc_)), register_count_per_match_(register_count_per_match), - input_(input), + input_object_(input), + input_(ToCharacterVector<Character>(input, no_gc_)), input_index_(input_index), pc_last_input_index_(zone->NewArray<int>(bytecode.length()), bytecode.length()), @@ -131,12 +168,15 @@ class NfaInterpreter { int match_num = 0; while (match_num != max_match_num) { - FindNextMatch(); + int err_code = FindNextMatch(); + if (err_code != RegExp::kInternalRegExpSuccess) return err_code; + if (!FoundMatch()) break; - Vector<int> registers = *best_match_registers_; + Vector<int> registers = *best_match_registers_; output_registers = std::copy(registers.begin(), registers.end(), output_registers); + ++match_num; const int match_begin = registers[0]; @@ -177,6 +217,69 @@ class NfaInterpreter { int* register_array_begin; }; + // Handles pending interrupts if there are any. Returns + // RegExp::kInternalRegExpSuccess if execution can continue, and an error + // code otherwise. + int HandleInterrupts() { + StackLimitCheck check(isolate_); + if (call_origin_ == RegExp::CallOrigin::kFromJs) { + // Direct calls from JavaScript can be interrupted in two ways: + // 1. A real stack overflow, in which case we let the caller throw the + // exception. + // 2. The stack guard was used to interrupt execution for another purpose, + // forcing the call through the runtime system. + if (check.JsHasOverflowed()) { + return RegExp::kInternalRegExpException; + } else if (check.InterruptRequested()) { + return RegExp::kInternalRegExpRetry; + } + } else { + DCHECK(call_origin_ == RegExp::CallOrigin::kFromRuntime); + HandleScope handles(isolate_); + Handle<ByteArray> bytecode_handle(bytecode_object_, isolate_); + Handle<String> input_handle(input_object_, isolate_); + + if (check.JsHasOverflowed()) { + // We abort the interpreter now anyway, so gc can't invalidate any + // pointers. + AllowHeapAllocation yes_gc; + isolate_->StackOverflow(); + return RegExp::kInternalRegExpException; + } else if (check.InterruptRequested()) { + // TODO(mbid): Is this really equivalent to whether the string is + // one-byte or two-byte? A comment at the declaration of + // IsOneByteRepresentationUnderneath says that this might fail for + // external strings. + const bool was_one_byte = + String::IsOneByteRepresentationUnderneath(input_object_); + + Object result; + { + AllowHeapAllocation yes_gc; + result = isolate_->stack_guard()->HandleInterrupts(); + } + if (result.IsException(isolate_)) { + return RegExp::kInternalRegExpException; + } + + // If we changed between a LATIN1 and a UC16 string, we need to restart + // regexp matching with the appropriate template instantiation of + // RawMatch. + if (String::IsOneByteRepresentationUnderneath(*input_handle) != + was_one_byte) { + return RegExp::kInternalRegExpRetry; + } + + // Update objects and pointers in case they have changed during gc. + bytecode_object_ = *bytecode_handle; + bytecode_ = ToInstructionVector(bytecode_object_, no_gc_); + input_object_ = *input_handle; + input_ = ToCharacterVector<Character>(input_object_, no_gc_); + } + } + return RegExp::kInternalRegExpSuccess; + } + // Change the current input index for future calls to `FindNextMatch`. void SetInputIndex(int new_input_index) { DCHECK_GE(input_index_, 0); @@ -187,8 +290,10 @@ class NfaInterpreter { // Find the next match and return the corresponding capture registers and // write its capture registers to `best_match_registers_`. The search starts - // at the current `input_index_`. - void FindNextMatch() { + // at the current `input_index_`. Returns RegExp::kInternalRegExpSuccess if + // execution could finish regularly (with or without a match) and an error + // code due to interrupt otherwise. + int FindNextMatch() { DCHECK(active_threads_.is_empty()); // TODO(mbid,v8:10765): Can we get around resetting `pc_last_input_index_` // here? As long as @@ -240,12 +345,20 @@ class NfaInterpreter { uc16 input_char = input_[input_index_]; ++input_index_; + static constexpr int kTicksBetweenInterruptHandling = 64; + if (input_index_ % kTicksBetweenInterruptHandling == 0) { + int err_code = HandleInterrupts(); + if (err_code != RegExp::kInternalRegExpSuccess) return err_code; + } + // We unblock all blocked_threads_ by feeding them the input char. FlushBlockedThreads(input_char); // Run all threads until they block or accept. RunActiveThreads(); } + + return RegExp::kInternalRegExpSuccess; } // Run an active thread `t` until it executes a CONSUME_RANGE or ACCEPT @@ -394,12 +507,20 @@ class NfaInterpreter { pc_last_input_index_[pc] = input_index_; } - const Vector<const RegExpInstruction> bytecode_; + Isolate* const isolate_; + + const RegExp::CallOrigin call_origin_; + + const DisallowHeapAllocation no_gc_; + + ByteArray bytecode_object_; + Vector<const RegExpInstruction> bytecode_; // Number of registers used per thread. const int register_count_per_match_; - const Vector<const Character> input_; + String input_object_; + Vector<const Character> input_; int input_index_; // pc_last_input_index_[k] records the value of input_index_ the last @@ -432,22 +553,25 @@ class NfaInterpreter { } // namespace -int ExperimentalRegExpInterpreter::FindMatchesNfaOneByte( - Vector<const RegExpInstruction> bytecode, int register_count_per_match, - Vector<const uint8_t> input, int start_index, int32_t* output_registers, - int output_register_count, Zone* zone) { - NfaInterpreter<uint8_t> interpreter(bytecode, register_count_per_match, input, - start_index, zone); - return interpreter.FindMatches(output_registers, output_register_count); -} - -int ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte( - Vector<const RegExpInstruction> bytecode, int register_count_per_match, - Vector<const uc16> input, int start_index, int32_t* output_registers, - int output_register_count, Zone* zone) { - NfaInterpreter<uc16> interpreter(bytecode, register_count_per_match, input, - start_index, zone); - return interpreter.FindMatches(output_registers, output_register_count); +int ExperimentalRegExpInterpreter::FindMatches( + Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray bytecode, + int register_count_per_match, String input, int start_index, + int32_t* output_registers, int output_register_count, Zone* zone) { + DCHECK(input.IsFlat()); + DisallowHeapAllocation no_gc; + + if (input.GetFlatContent(no_gc).IsOneByte()) { + NfaInterpreter<uint8_t> interpreter(isolate, call_origin, bytecode, + register_count_per_match, input, + start_index, zone); + return interpreter.FindMatches(output_registers, output_register_count); + } else { + DCHECK(input.GetFlatContent(no_gc).IsTwoByte()); + NfaInterpreter<uc16> interpreter(isolate, call_origin, bytecode, + register_count_per_match, input, + start_index, zone); + return interpreter.FindMatches(output_registers, output_register_count); + } } } // namespace internal diff --git a/deps/v8/src/regexp/experimental/experimental-interpreter.h b/deps/v8/src/regexp/experimental/experimental-interpreter.h index 32bff001b1..3da50e3902 100644 --- a/deps/v8/src/regexp/experimental/experimental-interpreter.h +++ b/deps/v8/src/regexp/experimental/experimental-interpreter.h @@ -5,7 +5,10 @@ #ifndef V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_INTERPRETER_H_ #define V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_INTERPRETER_H_ +#include "src/objects/fixed-array.h" +#include "src/objects/string.h" #include "src/regexp/experimental/experimental-bytecode.h" +#include "src/regexp/regexp.h" #include "src/utils/vector.h" namespace v8 { @@ -18,18 +21,13 @@ class ExperimentalRegExpInterpreter final : public AllStatic { // Executes a bytecode program in breadth-first NFA mode, without // backtracking, to find matching substrings. Trys to find up to // `max_match_num` matches in `input`, starting at `start_index`. Returns - // the actual number of matches found. The boundaires of matching subranges + // the actual number of matches found. The boundaries of matching subranges // are written to `matches_out`. Provided in variants for one-byte and // two-byte strings. - static int FindMatchesNfaOneByte(Vector<const RegExpInstruction> bytecode, - int capture_count, - Vector<const uint8_t> input, int start_index, - int32_t* output_registers, - int output_register_count, Zone* zone); - static int FindMatchesNfaTwoByte(Vector<const RegExpInstruction> bytecode, - int capture_count, Vector<const uc16> input, - int start_index, int32_t* output_registers, - int output_register_count, Zone* zone); + static int FindMatches(Isolate* isolate, RegExp::CallOrigin call_origin, + ByteArray bytecode, int capture_count, String input, + int start_index, int32_t* output_registers, + int output_register_count, Zone* zone); }; } // namespace internal diff --git a/deps/v8/src/regexp/experimental/experimental.cc b/deps/v8/src/regexp/experimental/experimental.cc index dc919f56c2..56c0596bb4 100644 --- a/deps/v8/src/regexp/experimental/experimental.cc +++ b/deps/v8/src/regexp/experimental/experimental.cc @@ -15,6 +15,8 @@ namespace internal { bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags, int capture_count) { + DCHECK(FLAG_enable_experimental_regexp_engine || + FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count); } @@ -33,7 +35,6 @@ void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re, bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) { DCHECK(FLAG_enable_experimental_regexp_engine); - DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP re->JSRegExpVerify(isolate); @@ -43,22 +44,34 @@ bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) { Smi::FromInt(JSRegExp::kUninitializedValue); } -bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) { - DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL); -#ifdef VERIFY_HEAP - re->JSRegExpVerify(isolate); -#endif +template <class T> +Handle<ByteArray> VectorToByteArray(Isolate* isolate, Vector<T> data) { + STATIC_ASSERT(std::is_trivial<T>::value); - Handle<String> source(re->Pattern(), isolate); - if (FLAG_trace_experimental_regexp_engine) { - StdoutStream{} << "Compiling experimental regexp " << *source << std::endl; - } + int byte_length = sizeof(T) * data.length(); + Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length); + DisallowHeapAllocation no_gc; + MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length); + return byte_array; +} +namespace { + +struct CompilationResult { + Handle<ByteArray> bytecode; + Handle<FixedArray> capture_name_map; +}; + +// Compiles source pattern, but doesn't change the regexp object. +base::Optional<CompilationResult> CompileImpl(Isolate* isolate, + Handle<JSRegExp> regexp) { Zone zone(isolate->allocator(), ZONE_NAME); + Handle<String> source(regexp->Pattern(), isolate); + JSRegExp::Flags flags = regexp->GetFlags(); + // Parse and compile the regexp source. RegExpCompileData parse_result; - JSRegExp::Flags flags = re->GetFlags(); FlatStringReader reader(isolate, source); DCHECK(!isolate->has_pending_exception()); @@ -67,28 +80,52 @@ bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) { if (!parse_success) { // The pattern was already parsed successfully during initialization, so // the only way parsing can fail now is because of stack overflow. - CHECK_EQ(parse_result.error, RegExpError::kStackOverflow); - USE(RegExp::ThrowRegExpException(isolate, re, source, parse_result.error)); - return false; + DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow); + USE(RegExp::ThrowRegExpException(isolate, regexp, source, + parse_result.error)); + return base::nullopt; } ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone); - int byte_length = sizeof(RegExpInstruction) * bytecode.length(); - Handle<ByteArray> bytecode_byte_array = - isolate->factory()->NewByteArray(byte_length); - MemCopy(bytecode_byte_array->GetDataStartAddress(), bytecode.begin(), - byte_length); + CompilationResult result; + result.bytecode = VectorToByteArray(isolate, bytecode.ToVector()); + result.capture_name_map = parse_result.capture_name_map; + return result; +} + +} // namespace - re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex, *bytecode_byte_array); - re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex, *bytecode_byte_array); +bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) { + DCHECK(FLAG_enable_experimental_regexp_engine); + DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL); +#ifdef VERIFY_HEAP + re->JSRegExpVerify(isolate); +#endif + + Handle<String> source(re->Pattern(), isolate); + if (FLAG_trace_experimental_regexp_engine) { + StdoutStream{} << "Compiling experimental regexp " << *source << std::endl; + } + + base::Optional<CompilationResult> compilation_result = + CompileImpl(isolate, re); + if (!compilation_result.has_value()) { + DCHECK(isolate->has_pending_exception()); + return false; + } + + re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex, + *compilation_result->bytecode); + re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex, + *compilation_result->bytecode); Handle<Code> trampoline = BUILTIN_CODE(isolate, RegExpExperimentalTrampoline); re->SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline); re->SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline); - re->SetCaptureNameMap(parse_result.capture_name_map); + re->SetCaptureNameMap(compilation_result->capture_name_map); return true; } @@ -101,45 +138,52 @@ Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) { return Vector<RegExpInstruction>(inst_begin, inst_num); } +namespace { + +int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin, + ByteArray bytecode, String subject, int capture_count, + int32_t* output_registers, int32_t output_register_count, + int32_t subject_index) { + DisallowHeapAllocation no_gc; + + int register_count_per_match = + JSRegExp::RegistersForCaptureCount(capture_count); + + int32_t result; + do { + DCHECK(subject.IsFlat()); + Zone zone(isolate->allocator(), ZONE_NAME); + result = ExperimentalRegExpInterpreter::FindMatches( + isolate, call_origin, bytecode, register_count_per_match, subject, + subject_index, output_registers, output_register_count, &zone); + } while (result == RegExp::kInternalRegExpRetry && + call_origin == RegExp::kFromRuntime); + return result; +} + +} // namespace + // Returns the number of matches. -int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, JSRegExp regexp, - String subject, int32_t* output_registers, +int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, + RegExp::CallOrigin call_origin, + JSRegExp regexp, String subject, + int32_t* output_registers, int32_t output_register_count, int32_t subject_index) { - DisallowHeapAllocation no_gc; - DCHECK(FLAG_enable_experimental_regexp_engine); + DisallowHeapAllocation no_gc; if (FLAG_trace_experimental_regexp_engine) { String source = String::cast(regexp.DataAt(JSRegExp::kSourceIndex)); StdoutStream{} << "Executing experimental regexp " << source << std::endl; } - Vector<RegExpInstruction> bytecode = AsInstructionSequence( - ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex))); - - if (FLAG_print_regexp_bytecode) { - StdoutStream{} << "Bytecode:" << std::endl; - StdoutStream{} << bytecode << std::endl; - } - - int register_count_per_match = - JSRegExp::RegistersForCaptureCount(regexp.CaptureCount()); - - DCHECK(subject.IsFlat()); - String::FlatContent subject_content = subject.GetFlatContent(no_gc); + ByteArray bytecode = + ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex)); - Zone zone(isolate->allocator(), ZONE_NAME); - - if (subject_content.IsOneByte()) { - return ExperimentalRegExpInterpreter::FindMatchesNfaOneByte( - bytecode, register_count_per_match, subject_content.ToOneByteVector(), - subject_index, output_registers, output_register_count, &zone); - } else { - return ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte( - bytecode, register_count_per_match, subject_content.ToUC16Vector(), - subject_index, output_registers, output_register_count, &zone); - } + return ExecRawImpl(isolate, call_origin, bytecode, subject, + regexp.CaptureCount(), output_registers, + output_register_count, subject_index); } int32_t ExperimentalRegExp::MatchForCallFromJs( @@ -148,7 +192,6 @@ int32_t ExperimentalRegExp::MatchForCallFromJs( Address backtrack_stack, RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) { DCHECK(FLAG_enable_experimental_regexp_engine); - DCHECK_NOT_NULL(isolate); DCHECK_NOT_NULL(output_registers); DCHECK(call_origin == RegExp::CallOrigin::kFromJs); @@ -162,15 +205,14 @@ int32_t ExperimentalRegExp::MatchForCallFromJs( JSRegExp regexp_obj = JSRegExp::cast(Object(regexp)); - return ExecRaw(isolate, regexp_obj, subject_string, output_registers, - output_register_count, start_position); + return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string, + output_registers, output_register_count, start_position); } MaybeHandle<Object> ExperimentalRegExp::Exec( Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int subject_index, Handle<RegExpMatchInfo> last_match_info) { DCHECK(FLAG_enable_experimental_regexp_engine); - DCHECK_EQ(regexp->TypeTag(), JSRegExp::EXPERIMENTAL); #ifdef VERIFY_HEAP regexp->JSRegExpVerify(isolate); @@ -197,16 +239,78 @@ MaybeHandle<Object> ExperimentalRegExp::Exec( output_registers_release.reset(output_registers); } - int num_matches = ExecRaw(isolate, *regexp, *subject, output_registers, - output_register_count, subject_index); + int num_matches = + ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject, + output_registers, output_register_count, subject_index); - if (num_matches == 0) { + if (num_matches > 0) { + DCHECK_EQ(num_matches, 1); + return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, + capture_count, output_registers); + } else if (num_matches == 0) { return isolate->factory()->null_value(); } else { + DCHECK_LT(num_matches, 0); + DCHECK(isolate->has_pending_exception()); + return MaybeHandle<Object>(); + } +} + +int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate, + Handle<JSRegExp> regexp, + Handle<String> subject, + int32_t* output_registers, + int32_t output_register_count, + int32_t subject_index) { + DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); + + if (FLAG_trace_experimental_regexp_engine) { + StdoutStream{} << "Experimental execution (oneshot) of regexp " + << regexp->Pattern() << std::endl; + } + + base::Optional<CompilationResult> compilation_result = + CompileImpl(isolate, regexp); + if (!compilation_result.has_value()) return RegExp::kInternalRegExpException; + + DisallowHeapAllocation no_gc; + return ExecRawImpl(isolate, RegExp::kFromRuntime, + *compilation_result->bytecode, *subject, + regexp->CaptureCount(), output_registers, + output_register_count, subject_index); +} + +MaybeHandle<Object> ExperimentalRegExp::OneshotExec( + Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, + int subject_index, Handle<RegExpMatchInfo> last_match_info) { + DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks); + DCHECK_NE(regexp->TypeTag(), JSRegExp::NOT_COMPILED); + + int capture_count = regexp->CaptureCount(); + int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count); + + int32_t* output_registers; + std::unique_ptr<int32_t[]> output_registers_release; + if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) { + output_registers = isolate->jsregexp_static_offsets_vector(); + } else { + output_registers = NewArray<int32_t>(output_register_count); + output_registers_release.reset(output_registers); + } + + int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers, + output_register_count, subject_index); + + if (num_matches > 0) { DCHECK_EQ(num_matches, 1); return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, output_registers); - return last_match_info; + } else if (num_matches == 0) { + return isolate->factory()->null_value(); + } else { + DCHECK_LT(num_matches, 0); + DCHECK(isolate->has_pending_exception()); + return MaybeHandle<Object>(); } } diff --git a/deps/v8/src/regexp/experimental/experimental.h b/deps/v8/src/regexp/experimental/experimental.h index 02f535f621..a0ee8d1081 100644 --- a/deps/v8/src/regexp/experimental/experimental.h +++ b/deps/v8/src/regexp/experimental/experimental.h @@ -39,10 +39,22 @@ class ExperimentalRegExp final : public AllStatic { static MaybeHandle<Object> Exec(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int index, Handle<RegExpMatchInfo> last_match_info); - static int32_t ExecRaw(Isolate* isolate, JSRegExp regexp, String subject, + static int32_t ExecRaw(Isolate* isolate, RegExp::CallOrigin call_origin, + JSRegExp regexp, String subject, int32_t* output_registers, int32_t output_register_count, int32_t subject_index); + // Compile and execute a regexp with the experimental engine, regardless of + // its type tag. The regexp itself is not changed (apart from lastIndex). + static MaybeHandle<Object> OneshotExec( + Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, + int index, Handle<RegExpMatchInfo> last_match_info); + static int32_t OneshotExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, + Handle<String> subject, + int32_t* output_registers, + int32_t output_register_count, + int32_t subject_index); + static constexpr bool kSupportsUnicode = false; }; diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc index 2135e977a7..27c1300ced 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc @@ -116,6 +116,7 @@ RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() { exit_label_.Unuse(); check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); + fallback_label_.Unuse(); } @@ -148,8 +149,13 @@ void RegExpMacroAssemblerIA32::Backtrack() { __ cmp(Operand(ebp, kBacktrackCount), Immediate(backtrack_limit())); __ j(not_equal, &next); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ jmp(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -940,6 +946,12 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { __ jmp(&return_eax); } + if (fallback_label_.is_linked()) { + __ bind(&fallback_label_); + __ mov(eax, FALLBACK_TO_EXPERIMENTAL); + __ jmp(&return_eax); + } + CodeDesc code_desc; masm_->GetCode(masm_->isolate(), &code_desc); Handle<Code> code = diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h index a30bff29a1..0cb29979d7 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h @@ -192,6 +192,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32 Label exit_label_; Label check_preempt_label_; Label stack_overflow_label_; + Label fallback_label_; }; } // namespace internal diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc index db79011284..e1b1119c17 100644 --- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc +++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc @@ -129,6 +129,7 @@ RegExpMacroAssemblerMIPS::~RegExpMacroAssemblerMIPS() { check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); internal_failure_label_.Unuse(); + fallback_label_.Unuse(); } @@ -165,8 +166,13 @@ void RegExpMacroAssemblerMIPS::Backtrack() { __ Sw(a0, MemOperand(frame_pointer(), kBacktrackCount)); __ Branch(&next, ne, a0, Operand(backtrack_limit())); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ jmp(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -910,6 +916,12 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ li(v0, Operand(EXCEPTION)); __ jmp(&return_v0); } + + if (fallback_label_.is_linked()) { + __ bind(&fallback_label_); + __ li(v0, Operand(FALLBACK_TO_EXPERIMENTAL)); + __ jmp(&return_v0); + } } CodeDesc code_desc; diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h index e2aea1b091..dd1c27a7db 100644 --- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h +++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h @@ -211,6 +211,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS Label check_preempt_label_; Label stack_overflow_label_; Label internal_failure_label_; + Label fallback_label_; }; } // namespace internal diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc index 309cebfcb9..48252a206e 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc @@ -165,6 +165,7 @@ RegExpMacroAssemblerMIPS::~RegExpMacroAssemblerMIPS() { check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); internal_failure_label_.Unuse(); + fallback_label_.Unuse(); } @@ -201,8 +202,13 @@ void RegExpMacroAssemblerMIPS::Backtrack() { __ Sd(a0, MemOperand(frame_pointer(), kBacktrackCount)); __ Branch(&next, ne, a0, Operand(backtrack_limit())); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ jmp(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -946,6 +952,12 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ li(v0, Operand(EXCEPTION)); __ jmp(&return_v0); } + + if (fallback_label_.is_linked()) { + __ bind(&fallback_label_); + __ li(v0, Operand(FALLBACK_TO_EXPERIMENTAL)); + __ jmp(&return_v0); + } } CodeDesc code_desc; diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h index aebfec1060..b9a29ca010 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h @@ -216,6 +216,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS Label check_preempt_label_; Label stack_overflow_label_; Label internal_failure_label_; + Label fallback_label_; }; } // namespace internal diff --git a/deps/v8/src/regexp/ppc/OWNERS b/deps/v8/src/regexp/ppc/OWNERS index 6edd45a6ef..02c2cd757c 100644 --- a/deps/v8/src/regexp/ppc/OWNERS +++ b/deps/v8/src/regexp/ppc/OWNERS @@ -2,3 +2,4 @@ junyan@redhat.com joransiu@ca.ibm.com midawson@redhat.com mfarazma@redhat.com +vasili.skurydzin@ibm.com diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc index 0b1c9a99b7..c0d69297f9 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc @@ -136,6 +136,7 @@ RegExpMacroAssemblerPPC::~RegExpMacroAssemblerPPC() { check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); internal_failure_label_.Unuse(); + fallback_label_.Unuse(); } @@ -176,11 +177,17 @@ void RegExpMacroAssemblerPPC::Backtrack() { __ LoadP(r3, MemOperand(frame_pointer(), kBacktrackCount), r0); __ addi(r3, r3, Operand(1)); __ StoreP(r3, MemOperand(frame_pointer(), kBacktrackCount), r0); - __ cmpi(r3, Operand(backtrack_limit())); + __ mov(r0, Operand(backtrack_limit())); + __ cmp(r3, r0); __ bne(&next); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ b(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -952,6 +959,12 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) { __ li(r3, Operand(EXCEPTION)); __ b(&return_r3); } + + if (fallback_label_.is_linked()) { + __ bind(&fallback_label_); + __ li(r3, Operand(FALLBACK_TO_EXPERIMENTAL)); + __ b(&return_r3); + } } CodeDesc code_desc; @@ -1140,7 +1153,6 @@ void RegExpMacroAssemblerPPC::CallCheckStackGuardState(Register scratch) { __ mov(ip, Operand(stack_guard_check)); EmbeddedData d = EmbeddedData::FromBlob(); - CHECK(Builtins::IsIsolateIndependent(Builtins::kDirectCEntry)); Address entry = d.InstructionStartOfBuiltin(Builtins::kDirectCEntry); __ mov(r0, Operand(entry, RelocInfo::OFF_HEAP_TARGET)); __ Call(r0); diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h index f6b959837f..18b7c5b110 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h @@ -197,6 +197,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC Label check_preempt_label_; Label stack_overflow_label_; Label internal_failure_label_; + Label fallback_label_; }; // Set of non-volatile registers saved/restored by generated regexp code. diff --git a/deps/v8/src/regexp/regexp-bytecode-generator.cc b/deps/v8/src/regexp/regexp-bytecode-generator.cc index 8abd15384e..262d788068 100644 --- a/deps/v8/src/regexp/regexp-bytecode-generator.cc +++ b/deps/v8/src/regexp/regexp-bytecode-generator.cc @@ -132,7 +132,11 @@ void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); } void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); } -void RegExpBytecodeGenerator::Backtrack() { Emit(BC_POP_BT, 0); } +void RegExpBytecodeGenerator::Backtrack() { + int error_code = + can_fallback() ? RegExp::RE_FALLBACK_TO_EXPERIMENTAL : RegExp::RE_FAILURE; + Emit(BC_POP_BT, error_code); +} void RegExpBytecodeGenerator::GoTo(Label* l) { if (advance_current_end_ == pc_) { @@ -368,7 +372,7 @@ void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index, Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) { Bind(&backtrack_); - Emit(BC_POP_BT, 0); + Backtrack(); Handle<ByteArray> array; if (FLAG_regexp_peephole_optimization) { diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc index ce1197a55b..fe032bcfdd 100644 --- a/deps/v8/src/regexp/regexp-compiler.cc +++ b/deps/v8/src/regexp/regexp-compiler.cc @@ -1777,10 +1777,11 @@ class LoopInitializationMarker { DCHECK(node_->traversed_loop_initialization_node_); node_->traversed_loop_initialization_node_ = false; } + LoopInitializationMarker(const LoopInitializationMarker&) = delete; + LoopInitializationMarker& operator=(const LoopInitializationMarker&) = delete; private: LoopChoiceNode* node_; - DISALLOW_COPY_AND_ASSIGN(LoopInitializationMarker); }; // Temporarily decrements min_loop_iterations_. @@ -1791,10 +1792,11 @@ class IterationDecrementer { --node_->min_loop_iterations_; } ~IterationDecrementer() { ++node_->min_loop_iterations_; } + IterationDecrementer(const IterationDecrementer&) = delete; + IterationDecrementer& operator=(const IterationDecrementer&) = delete; private: LoopChoiceNode* node_; - DISALLOW_COPY_AND_ASSIGN(IterationDecrementer); }; RegExpNode* SeqRegExpNode::FilterOneByte(int depth) { diff --git a/deps/v8/src/regexp/regexp-error.h b/deps/v8/src/regexp/regexp-error.h index 6145b404ab..628f93638e 100644 --- a/deps/v8/src/regexp/regexp-error.h +++ b/deps/v8/src/regexp/regexp-error.h @@ -30,6 +30,7 @@ namespace internal { T(InvalidQuantifier, "Invalid quantifier") \ T(InvalidGroup, "Invalid group") \ T(MultipleFlagDashes, "Multiple dashes in flag group") \ + T(NotLinear, "Cannot be executed in linear time") \ T(RepeatedFlag, "Repeated flag in flag group") \ T(InvalidFlagGroup, "Invalid flag group") \ T(TooManyCaptures, "Too many captures") \ diff --git a/deps/v8/src/regexp/regexp-interpreter.cc b/deps/v8/src/regexp/regexp-interpreter.cc index 80442a8db6..a73a9d3fcc 100644 --- a/deps/v8/src/regexp/regexp-interpreter.cc +++ b/deps/v8/src/regexp/regexp-interpreter.cc @@ -125,6 +125,8 @@ uint32_t LoadPacked24Unsigned(int32_t bytecode_and_packed_arg) { class BacktrackStack { public: BacktrackStack() = default; + BacktrackStack(const BacktrackStack&) = delete; + BacktrackStack& operator=(const BacktrackStack&) = delete; V8_WARN_UNUSED_RESULT bool push(int v) { data_.emplace_back(v); @@ -157,8 +159,6 @@ class BacktrackStack { static constexpr int kMaxSize = RegExpStack::kMaximumStackSize / sizeof(ValueT); - - DISALLOW_COPY_AND_ASSIGN(BacktrackStack); }; // Registers used during interpreter execution. These consist of output @@ -521,8 +521,8 @@ IrregexpInterpreter::Result RawMatch( BYTECODE(POP_BT) { STATIC_ASSERT(JSRegExp::kNoBacktrackLimit == 0); if (++backtrack_count == backtrack_limit) { - // Exceeded limits are treated as a failed match. - return IrregexpInterpreter::FAILURE; + int return_code = LoadPacked24Signed(insn); + return static_cast<IrregexpInterpreter::Result>(return_code); } IrregexpInterpreter::Result return_code = diff --git a/deps/v8/src/regexp/regexp-interpreter.h b/deps/v8/src/regexp/regexp-interpreter.h index be96476443..9b4a8c6c30 100644 --- a/deps/v8/src/regexp/regexp-interpreter.h +++ b/deps/v8/src/regexp/regexp-interpreter.h @@ -19,6 +19,7 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic { SUCCESS = RegExp::kInternalRegExpSuccess, EXCEPTION = RegExp::kInternalRegExpException, RETRY = RegExp::kInternalRegExpRetry, + FALLBACK_TO_EXPERIMENTAL = RegExp::kInternalRegExpFallbackToExperimental, }; // In case a StackOverflow occurs, a StackOverflowException is created and diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc index cf4346309e..62a72b1661 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler.cc @@ -315,7 +315,7 @@ int NativeRegExpMacroAssembler::Execute( int result = fn.Call(input.ptr(), start_offset, input_start, input_end, output, output_size, stack_base, call_origin, isolate, regexp.ptr()); - DCHECK(result >= RETRY); + DCHECK_GE(result, SMALLEST_REGEXP_RESULT); if (result == EXCEPTION && !isolate->has_pending_exception()) { // We detected a stack overflow (on the backtrack stack) in RegExp code, diff --git a/deps/v8/src/regexp/regexp-macro-assembler.h b/deps/v8/src/regexp/regexp-macro-assembler.h index 52465610cb..f1dc57db64 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.h +++ b/deps/v8/src/regexp/regexp-macro-assembler.h @@ -183,10 +183,19 @@ class RegExpMacroAssembler { void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; } bool slow_safe() { return slow_safe_compiler_; } + // Controls after how many backtracks irregexp should abort execution. If it + // can fall back to the experimental engine (see `set_can_fallback`), it will + // return the appropriate error code, otherwise it will return the number of + // matches found so far (perhaps none). void set_backtrack_limit(uint32_t backtrack_limit) { backtrack_limit_ = backtrack_limit; } + // Set whether or not irregexp can fall back to the experimental engine on + // excessive backtracking. The number of backtracks considered excessive can + // be controlled with set_backtrack_limit. + void set_can_fallback(bool val) { can_fallback_ = val; } + enum GlobalMode { NOT_GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK, @@ -211,9 +220,12 @@ class RegExpMacroAssembler { } uint32_t backtrack_limit() const { return backtrack_limit_; } + bool can_fallback() const { return can_fallback_; } + private: bool slow_safe_compiler_; uint32_t backtrack_limit_ = JSRegExp::kNoBacktrackLimit; + bool can_fallback_ = false; GlobalMode global_mode_; Isolate* isolate_; Zone* zone_; @@ -228,16 +240,20 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler { // RETRY: Something significant changed during execution, and the matching // should be retried from scratch. // EXCEPTION: Something failed during execution. If no exception has been - // thrown, it's an internal out-of-memory, and the caller should - // throw the exception. + // thrown, it's an internal out-of-memory, and the caller should + // throw the exception. // FAILURE: Matching failed. // SUCCESS: Matching succeeded, and the output array has been filled with - // capture positions. + // capture positions. + // FALLBACK_TO_EXPERIMENTAL: Execute the regexp on this subject using the + // experimental engine instead. enum Result { FAILURE = RegExp::kInternalRegExpFailure, SUCCESS = RegExp::kInternalRegExpSuccess, EXCEPTION = RegExp::kInternalRegExpException, RETRY = RegExp::kInternalRegExpRetry, + FALLBACK_TO_EXPERIMENTAL = RegExp::kInternalRegExpFallbackToExperimental, + SMALLEST_REGEXP_RESULT = RegExp::kInternalRegExpSmallestResult, }; NativeRegExpMacroAssembler(Isolate* isolate, Zone* zone); diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc index fa58764aaa..622baadc07 100644 --- a/deps/v8/src/regexp/regexp-parser.cc +++ b/deps/v8/src/regexp/regexp-parser.cc @@ -1829,15 +1829,6 @@ bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, return success; } -bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone, - FlatStringReader* input, - JSRegExp::Flags flags, - RegExpCompileData* result, - const DisallowHeapAllocation& no_gc) { - RegExpParser parser(input, flags, isolate, zone); - return parser.Parse(result, no_gc); -} - RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags) : zone_(zone), pending_empty_(false), diff --git a/deps/v8/src/regexp/regexp-parser.h b/deps/v8/src/regexp/regexp-parser.h index 74b653b47e..23afe9f939 100644 --- a/deps/v8/src/regexp/regexp-parser.h +++ b/deps/v8/src/regexp/regexp-parser.h @@ -159,10 +159,6 @@ class V8_EXPORT_PRIVATE RegExpParser { static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, JSRegExp::Flags flags, RegExpCompileData* result); - static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone, - FlatStringReader* input, JSRegExp::Flags flags, - RegExpCompileData* result, - const DisallowHeapAllocation& no_gc); private: bool Parse(RegExpCompileData* result, const DisallowHeapAllocation&); diff --git a/deps/v8/src/regexp/regexp-stack.cc b/deps/v8/src/regexp/regexp-stack.cc index 7f47aec5ae..9a80f6f211 100644 --- a/deps/v8/src/regexp/regexp-stack.cc +++ b/deps/v8/src/regexp/regexp-stack.cc @@ -14,12 +14,18 @@ RegExpStackScope::RegExpStackScope(Isolate* isolate) : regexp_stack_(isolate->regexp_stack()) { // Initialize, if not already initialized. regexp_stack_->EnsureCapacity(0); + // Irregexp is not reentrant in several ways; in particular, the + // RegExpStackScope is not reentrant since the destructor frees allocated + // memory. Protect against reentrancy here. + CHECK(!regexp_stack_->is_in_use()); + regexp_stack_->set_is_in_use(true); } RegExpStackScope::~RegExpStackScope() { // Reset the buffer if it has grown. regexp_stack_->Reset(); + DCHECK(!regexp_stack_->is_in_use()); } RegExpStack::RegExpStack() : thread_local_(this), isolate_(nullptr) {} @@ -36,17 +42,15 @@ char* RegExpStack::ArchiveStack(char* to) { DCHECK(thread_local_.owns_memory_); } - size_t size = sizeof(thread_local_); - MemCopy(reinterpret_cast<void*>(to), &thread_local_, size); + MemCopy(reinterpret_cast<void*>(to), &thread_local_, kThreadLocalSize); thread_local_ = ThreadLocal(this); - return to + size; + return to + kThreadLocalSize; } char* RegExpStack::RestoreStack(char* from) { - size_t size = sizeof(thread_local_); - MemCopy(&thread_local_, reinterpret_cast<void*>(from), size); - return from + size; + MemCopy(&thread_local_, reinterpret_cast<void*>(from), kThreadLocalSize); + return from + kThreadLocalSize; } void RegExpStack::Reset() { thread_local_.ResetToStaticStack(this); } @@ -60,6 +64,7 @@ void RegExpStack::ThreadLocal::ResetToStaticStack(RegExpStack* regexp_stack) { limit_ = reinterpret_cast<Address>(regexp_stack->static_stack_) + kStackLimitSlack * kSystemPointerSize; owns_memory_ = false; + is_in_use_ = false; } void RegExpStack::ThreadLocal::FreeAndInvalidate() { diff --git a/deps/v8/src/regexp/regexp-stack.h b/deps/v8/src/regexp/regexp-stack.h index 9394398fcc..25a213e471 100644 --- a/deps/v8/src/regexp/regexp-stack.h +++ b/deps/v8/src/regexp/regexp-stack.h @@ -26,13 +26,13 @@ class RegExpStackScope { // Initializes the stack memory area if necessary. explicit RegExpStackScope(Isolate* isolate); ~RegExpStackScope(); // Releases the stack if it has grown. + RegExpStackScope(const RegExpStackScope&) = delete; + RegExpStackScope& operator=(const RegExpStackScope&) = delete; RegExpStack* stack() const { return regexp_stack_; } private: RegExpStack* regexp_stack_; - - DISALLOW_COPY_AND_ASSIGN(RegExpStackScope); }; @@ -40,6 +40,8 @@ class RegExpStack { public: RegExpStack(); ~RegExpStack(); + RegExpStack(const RegExpStack&) = delete; + RegExpStack& operator=(const RegExpStack&) = delete; // Number of allocated locations on the stack below the limit. // No sequence of pushes must be longer that this without doing a stack-limit @@ -68,9 +70,12 @@ class RegExpStack { // If passing zero, the default/minimum size buffer is allocated. Address EnsureCapacity(size_t size); + bool is_in_use() const { return thread_local_.is_in_use_; } + void set_is_in_use(bool v) { thread_local_.is_in_use_ = v; } + // Thread local archiving. static constexpr int ArchiveSpacePerThread() { - return static_cast<int>(sizeof(ThreadLocal)); + return static_cast<int>(kThreadLocalSize); } char* ArchiveStack(char* to); char* RestoreStack(char* from); @@ -112,10 +117,12 @@ class RegExpStack { size_t memory_size_ = 0; Address limit_ = kNullAddress; bool owns_memory_ = false; // Whether memory_ is owned and must be freed. + bool is_in_use_ = false; // To guard against reentrancy. void ResetToStaticStack(RegExpStack* regexp_stack); void FreeAndInvalidate(); }; + static constexpr size_t kThreadLocalSize = sizeof(ThreadLocal); // Address of top of memory used as stack. Address memory_top_address_address() { @@ -133,8 +140,6 @@ class RegExpStack { friend class ExternalReference; friend class Isolate; friend class RegExpStackScope; - - DISALLOW_COPY_AND_ASSIGN(RegExpStack); }; } // namespace internal diff --git a/deps/v8/src/regexp/regexp-utils.cc b/deps/v8/src/regexp/regexp-utils.cc index 556edbdac8..07d1b5d8f3 100644 --- a/deps/v8/src/regexp/regexp-utils.cc +++ b/deps/v8/src/regexp/regexp-utils.cc @@ -173,9 +173,10 @@ bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) { // with the init order in the bootstrapper). InternalIndex kExecIndex(JSRegExp::kExecFunctionDescriptorIndex); DCHECK_EQ(*(isolate->factory()->exec_string()), - proto_map.instance_descriptors().GetKey(kExecIndex)); - if (proto_map.instance_descriptors().GetDetails(kExecIndex).constness() != - PropertyConstness::kConst) { + proto_map.instance_descriptors(kRelaxedLoad).GetKey(kExecIndex)); + if (proto_map.instance_descriptors(kRelaxedLoad) + .GetDetails(kExecIndex) + .constness() != PropertyConstness::kConst) { return false; } diff --git a/deps/v8/src/regexp/regexp.cc b/deps/v8/src/regexp/regexp.cc index 569acdab48..b62ad1fff8 100644 --- a/deps/v8/src/regexp/regexp.cc +++ b/deps/v8/src/regexp/regexp.cc @@ -17,6 +17,7 @@ #include "src/regexp/regexp-macro-assembler-arch.h" #include "src/regexp/regexp-macro-assembler-tracer.h" #include "src/regexp/regexp-parser.h" +#include "src/regexp/regexp-utils.h" #include "src/strings/string-search.h" #include "src/utils/ostreams.h" @@ -88,7 +89,7 @@ class RegExpImpl final : public AllStatic { static bool Compile(Isolate* isolate, Zone* zone, RegExpCompileData* input, JSRegExp::Flags flags, Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte, - uint32_t backtrack_limit); + uint32_t& backtrack_limit); // For acting on the JSRegExp data FixedArray. static int IrregexpMaxRegisterCount(FixedArray re); @@ -119,6 +120,10 @@ void RegExp::ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, error_text)); } +bool RegExp::IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp) { + return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp); +} + // Identifies the sort of regexps where the regexp engine is faster // than the code used for atom matches. static bool HasFewDifferentCharacters(Handle<String> pattern) { @@ -182,9 +187,22 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re, bool has_been_compiled = false; - if (FLAG_enable_experimental_regexp_engine && + if (FLAG_default_to_experimental_regexp_engine && ExperimentalRegExp::CanBeHandled(parse_result.tree, flags, parse_result.capture_count)) { + DCHECK(FLAG_enable_experimental_regexp_engine); + ExperimentalRegExp::Initialize(isolate, re, pattern, flags, + parse_result.capture_count); + has_been_compiled = true; + } else if (flags & JSRegExp::kLinear) { + DCHECK(FLAG_enable_experimental_regexp_engine); + if (!ExperimentalRegExp::CanBeHandled(parse_result.tree, flags, + parse_result.capture_count)) { + // TODO(mbid): The error could provide a reason for why the regexp can't + // be executed in linear time (e.g. due to back references). + return RegExp::ThrowRegExpException(isolate, re, pattern, + RegExpError::kNotLinear); + } ExperimentalRegExp::Initialize(isolate, re, pattern, flags, parse_result.capture_count); has_been_compiled = true; @@ -248,6 +266,14 @@ bool RegExp::EnsureFullyCompiled(Isolate* isolate, Handle<JSRegExp> re, } // static +MaybeHandle<Object> RegExp::ExperimentalOneshotExec( + Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, + int index, Handle<RegExpMatchInfo> last_match_info) { + return ExperimentalRegExp::OneshotExec(isolate, regexp, subject, index, + last_match_info); +} + +// static MaybeHandle<Object> RegExp::Exec(Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int index, Handle<RegExpMatchInfo> last_match_info) { @@ -450,9 +476,10 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, compile_data.compilation_target = re->ShouldProduceBytecode() ? RegExpCompilationTarget::kBytecode : RegExpCompilationTarget::kNative; + uint32_t backtrack_limit = re->BacktrackLimit(); const bool compilation_succeeded = Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject, - is_one_byte, re->BacktrackLimit()); + is_one_byte, backtrack_limit); if (!compilation_succeeded) { DCHECK(compile_data.error != RegExpError::kNone); RegExp::ThrowRegExpException(isolate, re, compile_data.error); @@ -482,6 +509,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, if (compile_data.register_count > register_max) { SetIrregexpMaxRegisterCount(*data, compile_data.register_count); } + data->set(JSRegExp::kIrregexpBacktrackLimit, Smi::FromInt(backtrack_limit)); if (FLAG_trace_regexp_tier_up) { PrintF("JSRegExp object %p %s size: %d\n", @@ -595,6 +623,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, case IrregexpInterpreter::SUCCESS: case IrregexpInterpreter::EXCEPTION: case IrregexpInterpreter::FAILURE: + case IrregexpInterpreter::FALLBACK_TO_EXPERIMENTAL: return result; case IrregexpInterpreter::RETRY: // The string has changed representation, and we must restart the @@ -665,13 +694,16 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec( int capture_count = regexp->CaptureCount(); return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count, output_registers); - } - if (res == RegExp::RE_EXCEPTION) { + } else if (res == RegExp::RE_FALLBACK_TO_EXPERIMENTAL) { + return ExperimentalRegExp::OneshotExec(isolate, regexp, subject, + previous_index, last_match_info); + } else if (res == RegExp::RE_EXCEPTION) { DCHECK(isolate->has_pending_exception()); return MaybeHandle<Object>(); + } else { + DCHECK(res == RegExp::RE_FAILURE); + return isolate->factory()->null_value(); } - DCHECK(res == RegExp::RE_FAILURE); - return isolate->factory()->null_value(); } // static @@ -740,15 +772,15 @@ bool RegExp::CompileForTesting(Isolate* isolate, Zone* zone, Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte) { + uint32_t backtrack_limit = JSRegExp::kNoBacktrackLimit; return RegExpImpl::Compile(isolate, zone, data, flags, pattern, - sample_subject, is_one_byte, - JSRegExp::kNoBacktrackLimit); + sample_subject, is_one_byte, backtrack_limit); } bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, JSRegExp::Flags flags, Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte, - uint32_t backtrack_limit) { + uint32_t& backtrack_limit) { if (JSRegExp::RegistersForCaptureCount(data->capture_count) > RegExpMacroAssembler::kMaxRegisterCount) { data->error = RegExpError::kTooLarge; @@ -825,7 +857,21 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, } macro_assembler->set_slow_safe(TooMuchRegExpCode(isolate, pattern)); - macro_assembler->set_backtrack_limit(backtrack_limit); + if (FLAG_enable_experimental_regexp_engine_on_excessive_backtracks && + ExperimentalRegExp::CanBeHandled(data->tree, flags, + data->capture_count)) { + if (backtrack_limit == JSRegExp::kNoBacktrackLimit) { + backtrack_limit = FLAG_regexp_backtracks_before_fallback; + } else { + backtrack_limit = + std::min(backtrack_limit, FLAG_regexp_backtracks_before_fallback); + } + macro_assembler->set_backtrack_limit(backtrack_limit); + macro_assembler->set_can_fallback(true); + } else { + macro_assembler->set_backtrack_limit(backtrack_limit); + macro_assembler->set_can_fallback(false); + } // Inserted here, instead of in Assembler, because it depends on information // in the AST that isn't replicated in the Node structure. @@ -1014,8 +1060,8 @@ int32_t* RegExpGlobalCache::FetchNext() { DCHECK(ExperimentalRegExp::IsCompiled(regexp_, isolate_)); DisallowHeapAllocation no_gc; num_matches_ = ExperimentalRegExp::ExecRaw( - isolate_, *regexp_, *subject_, register_array_, - register_array_size_, last_end_index); + isolate_, RegExp::kFromRuntime, *regexp_, *subject_, + register_array_, register_array_size_, last_end_index); break; } case JSRegExp::IRREGEXP: { @@ -1035,7 +1081,16 @@ int32_t* RegExpGlobalCache::FetchNext() { } } - if (num_matches_ <= 0) return nullptr; + // Fall back to experimental engine if needed and possible. + if (num_matches_ == RegExp::kInternalRegExpFallbackToExperimental) { + num_matches_ = ExperimentalRegExp::OneshotExecRaw( + isolate_, regexp_, subject_, register_array_, register_array_size_, + last_end_index); + } + + if (num_matches_ <= 0) { + return nullptr; + } current_match_index_ = 0; return register_array_; } else { diff --git a/deps/v8/src/regexp/regexp.h b/deps/v8/src/regexp/regexp.h index a6a3a8f003..3e20b5f80c 100644 --- a/deps/v8/src/regexp/regexp.h +++ b/deps/v8/src/regexp/regexp.h @@ -92,16 +92,25 @@ class RegExp final : public AllStatic { Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, int index, Handle<RegExpMatchInfo> last_match_info); + V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> + ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp, + Handle<String> subject, int index, + Handle<RegExpMatchInfo> last_match_info); + // Integral return values used throughout regexp code layers. static constexpr int kInternalRegExpFailure = 0; static constexpr int kInternalRegExpSuccess = 1; static constexpr int kInternalRegExpException = -1; static constexpr int kInternalRegExpRetry = -2; + static constexpr int kInternalRegExpFallbackToExperimental = -3; + static constexpr int kInternalRegExpSmallestResult = -3; enum IrregexpResult : int32_t { RE_FAILURE = kInternalRegExpFailure, RE_SUCCESS = kInternalRegExpSuccess, RE_EXCEPTION = kInternalRegExpException, + RE_RETRY = kInternalRegExpRetry, + RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental, }; // Set last match info. If match is nullptr, then setting captures is @@ -129,6 +138,8 @@ class RegExp final : public AllStatic { RegExpError error); static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, RegExpError error_text); + + static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp); }; // Uses a special global mode of irregexp-generated code to perform a global diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc index b574be8d74..9d2e62e1cb 100644 --- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc +++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc @@ -137,6 +137,7 @@ RegExpMacroAssemblerS390::~RegExpMacroAssemblerS390() { check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); internal_failure_label_.Unuse(); + fallback_label_.Unuse(); } int RegExpMacroAssemblerS390::stack_limit_slack() { @@ -174,8 +175,13 @@ void RegExpMacroAssemblerS390::Backtrack() { __ CmpLogicalP(r2, Operand(backtrack_limit())); __ bne(&next); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ jmp(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -949,6 +955,12 @@ Handle<HeapObject> RegExpMacroAssemblerS390::GetCode(Handle<String> source) { __ b(&return_r2); } + if (fallback_label_.is_linked()) { + __ bind(&fallback_label_); + __ LoadImmP(r2, Operand(FALLBACK_TO_EXPERIMENTAL)); + __ b(&return_r2); + } + CodeDesc code_desc; masm_->GetCode(isolate(), &code_desc); Handle<Code> code = diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h index e4f88f51b9..a01d409279 100644 --- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h +++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.h @@ -197,6 +197,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390 Label check_preempt_label_; Label stack_overflow_label_; Label internal_failure_label_; + Label fallback_label_; }; // Set of non-volatile registers saved/restored by generated regexp code. diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc index da0397689f..79574ca993 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc @@ -125,6 +125,7 @@ RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() { exit_label_.Unuse(); check_preempt_label_.Unuse(); stack_overflow_label_.Unuse(); + fallback_label_.Unuse(); } @@ -157,8 +158,13 @@ void RegExpMacroAssemblerX64::Backtrack() { __ cmpq(Operand(rbp, kBacktrackCount), Immediate(backtrack_limit())); __ j(not_equal, &next); - // Exceeded limits are treated as a failed match. - Fail(); + // Backtrack limit exceeded. + if (can_fallback()) { + __ jmp(&fallback_label_); + } else { + // Can't fallback, so we treat it as a failed match. + Fail(); + } __ bind(&next); } @@ -1000,6 +1006,12 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { __ jmp(&return_rax); } + if (fallback_label_.is_linked()) { + __ bind(&fallback_label_); + __ Set(rax, FALLBACK_TO_EXPERIMENTAL); + __ jmp(&return_rax); + } + FixupCodeRelativePositions(); CodeDesc code_desc; diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h index ea4d45edba..517a05d939 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h @@ -248,6 +248,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64 Label exit_label_; Label check_preempt_label_; Label stack_overflow_label_; + Label fallback_label_; }; } // namespace internal |