diff options
Diffstat (limited to 'deps/v8/src/maglev/maglev-code-generator.cc')
-rw-r--r-- | deps/v8/src/maglev/maglev-code-generator.cc | 779 |
1 files changed, 423 insertions, 356 deletions
diff --git a/deps/v8/src/maglev/maglev-code-generator.cc b/deps/v8/src/maglev/maglev-code-generator.cc index a20fd22110..1c221459f7 100644 --- a/deps/v8/src/maglev/maglev-code-generator.cc +++ b/deps/v8/src/maglev/maglev-code-generator.cc @@ -17,6 +17,7 @@ #include "src/common/globals.h" #include "src/compiler/backend/instruction.h" #include "src/deoptimizer/deoptimize-reason.h" +#include "src/deoptimizer/deoptimizer.h" #include "src/deoptimizer/translation-array.h" #include "src/execution/frame-constants.h" #include "src/interpreter/bytecode-register.h" @@ -44,16 +45,16 @@ template <typename RegisterT> struct RegisterTHelper; template <> struct RegisterTHelper<Register> { - static constexpr Register kScratch = kScratchRegister; static constexpr RegList kAllocatableRegisters = kAllocatableGeneralRegisters; }; template <> struct RegisterTHelper<DoubleRegister> { - static constexpr DoubleRegister kScratch = kScratchDoubleReg; static constexpr DoubleRegList kAllocatableRegisters = kAllocatableDoubleRegisters; }; +enum NeedsDecompression { kDoesNotNeedDecompression, kNeedsDecompression }; + // The ParallelMoveResolver is used to resolve multiple moves between registers // and stack slots that are intended to happen, semantically, in parallel. It // finds chains of moves that would clobber each other, and emits them in a non @@ -86,33 +87,39 @@ struct RegisterTHelper<DoubleRegister> { // It additionally keeps track of materialising moves, which don't have a stack // slot but rather materialise a value from, e.g., a constant. These can safely // be emitted at the end, once all the parallel moves are done. -template <typename RegisterT> +template <typename RegisterT, bool DecompressIfNeeded> class ParallelMoveResolver { - static constexpr RegisterT kScratchRegT = - RegisterTHelper<RegisterT>::kScratch; - static constexpr auto kAllocatableRegistersT = RegisterTHelper<RegisterT>::kAllocatableRegisters; + static_assert(!DecompressIfNeeded || std::is_same_v<Register, RegisterT>); public: - explicit ParallelMoveResolver(MaglevAssembler* masm) : masm_(masm) {} + explicit ParallelMoveResolver(MaglevAssembler* masm) + : masm_(masm), scratch_(RegisterT::no_reg()) {} void RecordMove(ValueNode* source_node, compiler::InstructionOperand source, - compiler::AllocatedOperand target) { - if (target.IsRegister()) { - RecordMoveToRegister(source_node, source, ToRegisterT<RegisterT>(target)); + compiler::AllocatedOperand target, + bool target_needs_to_be_decompressed) { + if (target.IsAnyRegister()) { + RecordMoveToRegister(source_node, source, ToRegisterT<RegisterT>(target), + target_needs_to_be_decompressed); } else { RecordMoveToStackSlot(source_node, source, - masm_->GetFramePointerOffsetForStackSlot(target)); + masm_->GetFramePointerOffsetForStackSlot(target), + target_needs_to_be_decompressed); } } void RecordMove(ValueNode* source_node, compiler::InstructionOperand source, - RegisterT target_reg) { - RecordMoveToRegister(source_node, source, target_reg); + RegisterT target_reg, + NeedsDecompression target_needs_to_be_decompressed) { + RecordMoveToRegister(source_node, source, target_reg, + target_needs_to_be_decompressed); } - void EmitMoves() { + void EmitMoves(RegisterT scratch) { + DCHECK(!scratch_.is_valid()); + scratch_ = scratch; for (RegisterT reg : kAllocatableRegistersT) { StartEmitMoveChain(reg); ValueNode* materializing_register_move = @@ -128,8 +135,8 @@ class ParallelMoveResolver { StartEmitMoveChain(moves_from_stack_slot_.begin()->first); } for (auto [stack_slot, node] : materializing_stack_slot_moves_) { - node->LoadToRegister(masm_, kScratchRegT); - EmitStackMove(stack_slot, kScratchRegT); + node->LoadToRegister(masm_, scratch_); + __ Move(StackSlot{stack_slot}, scratch_); } } @@ -139,12 +146,25 @@ class ParallelMoveResolver { ParallelMoveResolver operator=(const ParallelMoveResolver&) = delete; private: - // The targets of moves from a source, i.e. the set of outgoing edges for a - // node in the move graph. + // For the GapMoveTargets::needs_decompression member when DecompressIfNeeded + // is false. + struct DummyNeedsDecompression { + // NOLINTNEXTLINE + DummyNeedsDecompression(NeedsDecompression) {} + }; + + // The targets of moves from a source, i.e. the set of outgoing edges for + // a node in the move graph. struct GapMoveTargets { + base::SmallVector<int32_t, 1> stack_slots = base::SmallVector<int32_t, 1>{}; RegListBase<RegisterT> registers; - base::SmallVector<uint32_t, 1> stack_slots = - base::SmallVector<uint32_t, 1>{}; + + // We only need this field for DecompressIfNeeded, otherwise use an empty + // dummy value. + V8_NO_UNIQUE_ADDRESS + std::conditional_t<DecompressIfNeeded, NeedsDecompression, + DummyNeedsDecompression> + needs_decompression = kDoesNotNeedDecompression; GapMoveTargets() = default; GapMoveTargets(GapMoveTargets&&) V8_NOEXCEPT = default; @@ -178,11 +198,11 @@ class ParallelMoveResolver { } } - void CheckNoExistingMoveToStackSlot(uint32_t target_slot) { - for (Register reg : kAllocatableRegistersT) { + void CheckNoExistingMoveToStackSlot(int32_t target_slot) { + for (RegisterT reg : kAllocatableRegistersT) { auto& stack_slots = moves_from_register_[reg.code()].stack_slots; if (std::any_of(stack_slots.begin(), stack_slots.end(), - [&](uint32_t slot) { return slot == target_slot; })) { + [&](int32_t slot) { return slot == target_slot; })) { FATAL("Existing move from %s to stack slot %d", RegisterName(reg), target_slot); } @@ -190,7 +210,7 @@ class ParallelMoveResolver { for (auto& [stack_slot, targets] : moves_from_stack_slot_) { auto& stack_slots = targets.stack_slots; if (std::any_of(stack_slots.begin(), stack_slots.end(), - [&](uint32_t slot) { return slot == target_slot; })) { + [&](int32_t slot) { return slot == target_slot; })) { FATAL("Existing move from stack slot %d to stack slot %d", stack_slot, target_slot); } @@ -204,51 +224,99 @@ class ParallelMoveResolver { } #else void CheckNoExistingMoveToRegister(RegisterT target_reg) {} - void CheckNoExistingMoveToStackSlot(uint32_t target_slot) {} + void CheckNoExistingMoveToStackSlot(int32_t target_slot) {} #endif void RecordMoveToRegister(ValueNode* node, compiler::InstructionOperand source, - RegisterT target_reg) { + RegisterT target_reg, + bool target_needs_to_be_decompressed) { // There shouldn't have been another move to this register already. CheckNoExistingMoveToRegister(target_reg); + NeedsDecompression needs_decompression = kDoesNotNeedDecompression; + if constexpr (DecompressIfNeeded) { + if (target_needs_to_be_decompressed && + !node->decompresses_tagged_result()) { + needs_decompression = kNeedsDecompression; + } + } else { + DCHECK_IMPLIES(target_needs_to_be_decompressed, + node->decompresses_tagged_result()); + } + + GapMoveTargets* targets; if (source.IsAnyRegister()) { RegisterT source_reg = ToRegisterT<RegisterT>(source); - if (target_reg != source_reg) { - moves_from_register_[source_reg.code()].registers.set(target_reg); + if (target_reg == source_reg) { + // We should never have a register aliasing case that needs + // decompression, since this path is only used by exception phis and + // they have no reg->reg moves. + DCHECK_EQ(needs_decompression, kDoesNotNeedDecompression); + return; } + targets = &moves_from_register_[source_reg.code()]; } else if (source.IsAnyStackSlot()) { - uint32_t source_slot = masm_->GetFramePointerOffsetForStackSlot( + int32_t source_slot = masm_->GetFramePointerOffsetForStackSlot( compiler::AllocatedOperand::cast(source)); - moves_from_stack_slot_[source_slot].registers.set(target_reg); + targets = &moves_from_stack_slot_[source_slot]; } else { DCHECK(source.IsConstant()); DCHECK(IsConstantNode(node->opcode())); materializing_register_moves_[target_reg.code()] = node; + // No need to update `targets.needs_decompression`, materialization is + // always decompressed. + return; + } + + targets->registers.set(target_reg); + if (needs_decompression == kNeedsDecompression) { + targets->needs_decompression = kNeedsDecompression; } } void RecordMoveToStackSlot(ValueNode* node, compiler::InstructionOperand source, - uint32_t target_slot) { + int32_t target_slot, + bool target_needs_to_be_decompressed) { // There shouldn't have been another move to this stack slot already. CheckNoExistingMoveToStackSlot(target_slot); + NeedsDecompression needs_decompression = kDoesNotNeedDecompression; + if constexpr (DecompressIfNeeded) { + if (target_needs_to_be_decompressed && + !node->decompresses_tagged_result()) { + needs_decompression = kNeedsDecompression; + } + } else { + DCHECK_IMPLIES(target_needs_to_be_decompressed, + node->decompresses_tagged_result()); + } + + GapMoveTargets* targets; if (source.IsAnyRegister()) { RegisterT source_reg = ToRegisterT<RegisterT>(source); - moves_from_register_[source_reg.code()].stack_slots.push_back( - target_slot); + targets = &moves_from_register_[source_reg.code()]; } else if (source.IsAnyStackSlot()) { - uint32_t source_slot = masm_->GetFramePointerOffsetForStackSlot( + int32_t source_slot = masm_->GetFramePointerOffsetForStackSlot( compiler::AllocatedOperand::cast(source)); - if (source_slot != target_slot) { - moves_from_stack_slot_[source_slot].stack_slots.push_back(target_slot); + if (source_slot == target_slot && + needs_decompression == kDoesNotNeedDecompression) { + return; } + targets = &moves_from_stack_slot_[source_slot]; } else { DCHECK(source.IsConstant()); DCHECK(IsConstantNode(node->opcode())); materializing_stack_slot_moves_.emplace_back(target_slot, node); + // No need to update `targets.needs_decompression`, materialization is + // always decompressed. + return; + } + + targets->stack_slots.push_back(target_slot); + if (needs_decompression == kNeedsDecompression) { + targets->needs_decompression = kNeedsDecompression; } } @@ -258,7 +326,7 @@ class ParallelMoveResolver { return std::exchange(moves_from_register_[source_reg.code()], GapMoveTargets{}); } - GapMoveTargets PopTargets(uint32_t source_slot) { + GapMoveTargets PopTargets(int32_t source_slot) { auto handle = moves_from_stack_slot_.extract(source_slot); if (handle.empty()) return {}; DCHECK(!handle.mapped().is_empty()); @@ -286,10 +354,10 @@ class ParallelMoveResolver { // chain start. if (has_cycle) { if (!scratch_has_cycle_start_) { - Pop(kScratchRegT); + Pop(scratch_); scratch_has_cycle_start_ = true; } - EmitMovesFromSource(kScratchRegT, std::move(targets)); + EmitMovesFromSource(scratch_, std::move(targets)); scratch_has_cycle_start_ = false; __ RecordComment("-- * End of cycle"); } else { @@ -306,10 +374,10 @@ class ParallelMoveResolver { if (chain_start == source) { __ RecordComment("-- * Cycle"); DCHECK(!scratch_has_cycle_start_); - if constexpr (std::is_same_v<ChainStartT, uint32_t>) { - EmitStackMove(kScratchRegT, chain_start); + if constexpr (std::is_same_v<ChainStartT, int32_t>) { + __ Move(scratch_, StackSlot{chain_start}); } else { - __ Move(kScratchRegT, chain_start); + __ Move(scratch_, chain_start); } scratch_has_cycle_start_ = true; return true; @@ -338,7 +406,7 @@ class ParallelMoveResolver { for (auto target : targets.registers) { has_cycle |= ContinueEmitMoveChain(chain_start, target); } - for (uint32_t target_slot : targets.stack_slots) { + for (int32_t target_slot : targets.stack_slots) { has_cycle |= ContinueEmitMoveChain(chain_start, target_slot); } return has_cycle; @@ -346,18 +414,23 @@ class ParallelMoveResolver { void EmitMovesFromSource(RegisterT source_reg, GapMoveTargets&& targets) { DCHECK(moves_from_register_[source_reg.code()].is_empty()); + if constexpr (DecompressIfNeeded) { + if (targets.needs_decompression == kNeedsDecompression) { + __ DecompressTagged(source_reg, source_reg); + } + } for (RegisterT target_reg : targets.registers) { DCHECK(moves_from_register_[target_reg.code()].is_empty()); __ Move(target_reg, source_reg); } - for (uint32_t target_slot : targets.stack_slots) { + for (int32_t target_slot : targets.stack_slots) { DCHECK_EQ(moves_from_stack_slot_.find(target_slot), moves_from_stack_slot_.end()); - EmitStackMove(target_slot, source_reg); + __ Move(StackSlot{target_slot}, source_reg); } } - void EmitMovesFromSource(uint32_t source_slot, GapMoveTargets&& targets) { + void EmitMovesFromSource(int32_t source_slot, GapMoveTargets&& targets) { DCHECK_EQ(moves_from_stack_slot_.find(source_slot), moves_from_stack_slot_.end()); @@ -372,51 +445,35 @@ class ParallelMoveResolver { // Otherwise, cache the slot value on the scratch register, clobbering it // if necessary. if (scratch_has_cycle_start_) { - Push(kScratchRegT); + Push(scratch_); scratch_has_cycle_start_ = false; } - register_with_slot_value = kScratchRegT; + register_with_slot_value = scratch_; } - // Now emit moves from that cached register instead of from the stack slot. DCHECK(register_with_slot_value.is_valid()); DCHECK(moves_from_register_[register_with_slot_value.code()].is_empty()); - EmitStackMove(register_with_slot_value, source_slot); + __ Move(register_with_slot_value, StackSlot{source_slot}); + // Decompress after the first move, subsequent moves reuse this register so + // they're guaranteed to be decompressed. + if constexpr (DecompressIfNeeded) { + if (targets.needs_decompression == kNeedsDecompression) { + __ DecompressTagged(register_with_slot_value, register_with_slot_value); + targets.needs_decompression = kDoesNotNeedDecompression; + } + } EmitMovesFromSource(register_with_slot_value, std::move(targets)); } - // The slot index used for representing slots in the move graph is the offset - // from the frame pointer. These helpers help translate this into an actual - // machine move. - void EmitStackMove(uint32_t target_slot, Register source_reg) { - __ movq(MemOperand(rbp, target_slot), source_reg); - } - void EmitStackMove(uint32_t target_slot, DoubleRegister source_reg) { - __ Movsd(MemOperand(rbp, target_slot), source_reg); - } - void EmitStackMove(Register target_reg, uint32_t source_slot) { - __ movq(target_reg, MemOperand(rbp, source_slot)); - } - void EmitStackMove(DoubleRegister target_reg, uint32_t source_slot) { - __ Movsd(target_reg, MemOperand(rbp, source_slot)); - } - void Push(Register reg) { __ Push(reg); } void Push(DoubleRegister reg) { __ PushAll({reg}); } - void Push(uint32_t stack_slot) { - __ movq(kScratchRegister, MemOperand(rbp, stack_slot)); - __ movq(MemOperand(rsp, -1), kScratchRegister); - } void Pop(Register reg) { __ Pop(reg); } void Pop(DoubleRegister reg) { __ PopAll({reg}); } - void Pop(uint32_t stack_slot) { - __ movq(kScratchRegister, MemOperand(rsp, -1)); - __ movq(MemOperand(rbp, stack_slot), kScratchRegister); - } - MacroAssembler* masm() const { return masm_; } + MaglevAssembler* masm() const { return masm_; } MaglevAssembler* const masm_; + RegisterT scratch_; // Keep moves to/from registers and stack slots separate -- there are a fixed // number of registers but an infinite number of stack slots, so the register @@ -427,15 +484,16 @@ class ParallelMoveResolver { std::array<GapMoveTargets, RegisterT::kNumRegisters> moves_from_register_ = {}; + // TODO(victorgomes): Use MaglevAssembler::StackSlot instead of int32_t. // moves_from_stack_slot_[source] = target. - std::unordered_map<uint32_t, GapMoveTargets> moves_from_stack_slot_; + std::unordered_map<int32_t, GapMoveTargets> moves_from_stack_slot_; // materializing_register_moves[target] = node. std::array<ValueNode*, RegisterT::kNumRegisters> materializing_register_moves_ = {}; // materializing_stack_slot_moves = {(node,target), ... }. - std::vector<std::pair<uint32_t, ValueNode*>> materializing_stack_slot_moves_; + std::vector<std::pair<int32_t, ValueNode*>> materializing_stack_slot_moves_; bool scratch_has_cycle_start_ = false; }; @@ -486,35 +544,46 @@ class ExceptionHandlerTrampolineBuilder { // values are tagged and b) the stack walk treats unknown stack slots as // tagged. - const InterpretedDeoptFrame& lazy_frame = - deopt_info->top_frame().type() == - DeoptFrame::FrameType::kBuiltinContinuationFrame - ? deopt_info->top_frame().parent()->as_interpreted() - : deopt_info->top_frame().as_interpreted(); + // TODO(victorgomes): Update this once we support exceptions in inlined + // functions. Currently, only the bottom frame can contain a catch block. + const DeoptFrame* bottom_frame = &deopt_info->top_frame(); + while (bottom_frame->parent() != nullptr) { + bottom_frame = bottom_frame->parent(); + } + const InterpretedDeoptFrame& lazy_frame = bottom_frame->as_interpreted(); // TODO(v8:7700): Handle inlining. - - ParallelMoveResolver<Register> direct_moves(masm_); + ParallelMoveResolver<Register, true> direct_moves(masm_); MoveVector materialising_moves; bool save_accumulator = false; RecordMoves(lazy_frame.unit(), catch_block, lazy_frame.frame_state(), &direct_moves, &materialising_moves, &save_accumulator); - - __ bind(&handler_info->trampoline_entry); + __ BindJumpTarget(&handler_info->trampoline_entry); __ RecordComment("-- Exception handler trampoline START"); EmitMaterialisationsAndPushResults(materialising_moves, save_accumulator); + __ RecordComment("EmitMoves"); - direct_moves.EmitMoves(); - EmitPopMaterialisedResults(materialising_moves, save_accumulator); - __ jmp(catch_block->label()); +// TODO(victorgomes): Add a scratch register scope to MaglevAssembler and +// remove this arch depedent code. +#ifdef V8_TARGET_ARCH_ARM64 + UseScratchRegisterScope temps(masm_); + Register scratch = temps.AcquireX(); +#elif V8_TARGET_ARCH_X64 + Register scratch = kScratchRegister; +#else +#error "Maglev does not supported this architecture." +#endif + direct_moves.EmitMoves(scratch); + EmitPopMaterialisedResults(materialising_moves, save_accumulator, scratch); + __ Jump(catch_block->label()); __ RecordComment("-- Exception handler trampoline END"); } - MacroAssembler* masm() const { return masm_; } + MaglevAssembler* masm() const { return masm_; } void RecordMoves(const MaglevCompilationUnit& unit, BasicBlock* catch_block, const CompactInterpreterFrameState* register_frame, - ParallelMoveResolver<Register>* direct_moves, + ParallelMoveResolver<Register, true>* direct_moves, MoveVector* materialising_moves, bool* save_accumulator) { for (Phi* phi : *catch_block->phis()) { DCHECK(phi->is_exception_phi()); @@ -540,22 +609,18 @@ class ExceptionHandlerTrampolineBuilder { DCHECK(!source->allocation().IsRegister()); switch (source->properties().value_representation()) { + case ValueRepresentation::kWord64: + UNREACHABLE(); case ValueRepresentation::kTagged: direct_moves->RecordMove( source, source->allocation(), - compiler::AllocatedOperand::cast(target.operand())); + compiler::AllocatedOperand::cast(target.operand()), + phi->decompresses_tagged_result() ? kNeedsDecompression + : kDoesNotNeedDecompression); break; case ValueRepresentation::kInt32: - if (source->allocation().IsConstant()) { - // TODO(jgruber): Why is it okay for Int32 constants to remain - // untagged while non-constants are unconditionally smi-tagged or - // converted to a HeapNumber during materialisation? - direct_moves->RecordMove( - source, source->allocation(), - compiler::AllocatedOperand::cast(target.operand())); - } else { - materialising_moves->emplace_back(target, source); - } + case ValueRepresentation::kUint32: + materialising_moves->emplace_back(target, source); break; case ValueRepresentation::kFloat64: materialising_moves->emplace_back(target, source); @@ -582,73 +647,37 @@ class ExceptionHandlerTrampolineBuilder { // talking about a presumably infrequent case for exception handlers. __ RecordComment("EmitMaterialisationsAndPushResults"); + if (save_accumulator) __ Push(kReturnRegister0); for (const Move& move : moves) { - MaterialiseTo(move.source, kReturnRegister0); + // We consider constants after all other operations, since constants + // don't need to call NewHeapNumber. + if (IsConstantNode(move.source->opcode())) continue; + __ MaterialiseValueNode(kReturnRegister0, move.source); __ Push(kReturnRegister0); } } void EmitPopMaterialisedResults(const MoveVector& moves, - bool save_accumulator) const { + bool save_accumulator, + Register scratch) const { if (moves.size() == 0) return; __ RecordComment("EmitPopMaterialisedResults"); - for (auto it = moves.rbegin(); it < moves.rend(); it++) { - const ValueLocation& target = it->target; - if (target.operand().IsRegister()) { - __ Pop(target.AssignedGeneralRegister()); + for (const Move& move : base::Reversed(moves)) { + const ValueLocation& target = move.target; + Register target_reg = target.operand().IsAnyRegister() + ? target.AssignedGeneralRegister() + : scratch; + if (IsConstantNode(move.source->opcode())) { + __ MaterialiseValueNode(target_reg, move.source); } else { - DCHECK(target.operand().IsStackSlot()); - __ Pop(kScratchRegister); - __ movq(masm_->ToMemOperand(target.operand()), kScratchRegister); + __ Pop(target_reg); } - } - - if (save_accumulator) __ Pop(kReturnRegister0); - } - - void MaterialiseTo(ValueNode* value, Register dst) const { - using D = NewHeapNumberDescriptor; - switch (value->properties().value_representation()) { - case ValueRepresentation::kInt32: { - // We consider Int32Constants together with tagged values. - DCHECK(!value->allocation().IsConstant()); - Label done; - __ movq(dst, ToMemOperand(value)); - __ addl(dst, dst); - __ j(no_overflow, &done); - // If we overflow, instead of bailing out (deopting), we change - // representation to a HeapNumber. - __ Cvtlsi2sd(D::GetDoubleRegisterParameter(D::kValue), - ToMemOperand(value)); - __ CallBuiltin(Builtin::kNewHeapNumber); - __ Move(dst, kReturnRegister0); - __ bind(&done); - break; + if (target_reg == scratch) { + __ Move(masm_->ToMemOperand(target.operand()), scratch); } - case ValueRepresentation::kFloat64: - if (Float64Constant* constant = value->TryCast<Float64Constant>()) { - __ Move(D::GetDoubleRegisterParameter(D::kValue), constant->value()); - } else { - __ Movsd(D::GetDoubleRegisterParameter(D::kValue), - ToMemOperand(value)); - } - __ CallBuiltin(Builtin::kNewHeapNumber); - __ Move(dst, kReturnRegister0); - break; - case ValueRepresentation::kTagged: - UNREACHABLE(); } - } - - MemOperand ToMemOperand(ValueNode* node) const { - DCHECK(node->allocation().IsAnyStackSlot()); - return masm_->ToMemOperand(node->allocation()); - } - - MemOperand ToMemOperand(const ValueLocation& location) const { - DCHECK(location.operand().IsStackSlot()); - return masm_->ToMemOperand(location.operand()); + if (save_accumulator) __ Pop(kReturnRegister0); } MaglevAssembler* const masm_; @@ -660,155 +689,33 @@ class MaglevCodeGeneratingNodeProcessor { : masm_(masm) {} void PreProcessGraph(Graph* graph) { + // TODO(victorgomes): I wonder if we want to create a struct that shares + // these fields between graph and code_gen_state. code_gen_state()->set_untagged_slots(graph->untagged_stack_slots()); code_gen_state()->set_tagged_slots(graph->tagged_stack_slots()); + code_gen_state()->set_max_deopted_stack_size( + graph->max_deopted_stack_size()); + code_gen_state()->set_max_call_stack_args_(graph->max_call_stack_args()); if (v8_flags.maglev_break_on_entry) { - __ int3(); + __ DebugBreak(); } - if (v8_flags.maglev_ool_prologue) { - // Call the out-of-line prologue (with parameters passed on the stack). - __ Push(Immediate(code_gen_state()->stack_slots() * kSystemPointerSize)); - __ Push(Immediate(code_gen_state()->tagged_slots() * kSystemPointerSize)); - __ CallBuiltin(Builtin::kMaglevOutOfLinePrologue); - } else { - __ BailoutIfDeoptimized(rbx); - - // Tiering support. - // TODO(jgruber): Extract to a builtin (the tiering prologue is ~230 bytes - // per Maglev code object on x64). - { - // Scratch registers. Don't clobber regs related to the calling - // convention (e.g. kJavaScriptCallArgCountRegister). Keep up-to-date - // with deferred flags code. - Register flags = rcx; - Register feedback_vector = r9; - - // Load the feedback vector. - __ LoadTaggedPointerField( - feedback_vector, - FieldOperand(kJSFunctionRegister, JSFunction::kFeedbackCellOffset)); - __ LoadTaggedPointerField( - feedback_vector, FieldOperand(feedback_vector, Cell::kValueOffset)); - __ AssertFeedbackVector(feedback_vector); - - __ LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing( - flags, feedback_vector, CodeKind::MAGLEV, - &deferred_flags_need_processing_); - } - - __ EnterFrame(StackFrame::MAGLEV); - - // Save arguments in frame. - // TODO(leszeks): Consider eliding this frame if we don't make any calls - // that could clobber these registers. - __ Push(kContextRegister); - __ Push(kJSFunctionRegister); // Callee's JS function. - __ Push(kJavaScriptCallArgCountRegister); // Actual argument count. - - { - ASM_CODE_COMMENT_STRING(masm(), " Stack/interrupt check"); - // Stack check. This folds the checks for both the interrupt stack limit - // check and the real stack limit into one by just checking for the - // interrupt limit. The interrupt limit is either equal to the real - // stack limit or tighter. By ensuring we have space until that limit - // after building the frame we can quickly precheck both at once. - __ Move(kScratchRegister, rsp); - // TODO(leszeks): Include a max call argument size here. - __ subq(kScratchRegister, Immediate(code_gen_state()->stack_slots() * - kSystemPointerSize)); - __ cmpq(kScratchRegister, - __ StackLimitAsOperand(StackLimitKind::kInterruptStackLimit)); - - __ j(below, &deferred_call_stack_guard_); - __ bind(&deferred_call_stack_guard_return_); - } - - // Initialize stack slots. - if (graph->tagged_stack_slots() > 0) { - ASM_CODE_COMMENT_STRING(masm(), "Initializing stack slots"); - // TODO(leszeks): Consider filling with xmm + movdqa instead. - __ Move(rax, Immediate(0)); - - // Magic value. Experimentally, an unroll size of 8 doesn't seem any - // worse than fully unrolled pushes. - const int kLoopUnrollSize = 8; - int tagged_slots = graph->tagged_stack_slots(); - if (tagged_slots < 2 * kLoopUnrollSize) { - // If the frame is small enough, just unroll the frame fill - // completely. - for (int i = 0; i < tagged_slots; ++i) { - __ pushq(rax); - } - } else { - // Extract the first few slots to round to the unroll size. - int first_slots = tagged_slots % kLoopUnrollSize; - for (int i = 0; i < first_slots; ++i) { - __ pushq(rax); - } - __ Move(rbx, Immediate(tagged_slots / kLoopUnrollSize)); - // We enter the loop unconditionally, so make sure we need to loop at - // least once. - DCHECK_GT(tagged_slots / kLoopUnrollSize, 0); - Label loop; - __ bind(&loop); - for (int i = 0; i < kLoopUnrollSize; ++i) { - __ pushq(rax); - } - __ decl(rbx); - __ j(greater, &loop); - } - } - if (graph->untagged_stack_slots() > 0) { - // Extend rsp by the size of the remaining untagged part of the frame, - // no need to initialise these. - __ subq(rsp, - Immediate(graph->untagged_stack_slots() * kSystemPointerSize)); - } - } + __ Prologue(graph); } - void PostProcessGraph(Graph*) { - __ int3(); - - if (!v8_flags.maglev_ool_prologue) { - __ bind(&deferred_call_stack_guard_); - { - ASM_CODE_COMMENT_STRING(masm(), "Stack/interrupt call"); - // Save any registers that can be referenced by RegisterInput. - // TODO(leszeks): Only push those that are used by the graph. - __ PushAll(RegisterInput::kAllowedRegisters); - // Push the frame size - __ Push(Immediate(Smi::FromInt(code_gen_state()->stack_slots() * - kSystemPointerSize))); - __ CallRuntime(Runtime::kStackGuardWithGap, 1); - __ PopAll(RegisterInput::kAllowedRegisters); - __ jmp(&deferred_call_stack_guard_return_); - } - - __ bind(&deferred_flags_need_processing_); - { - ASM_CODE_COMMENT_STRING(masm(), "Optimized marker check"); - // See PreProcessGraph. - Register flags = rcx; - Register feedback_vector = r9; - // TODO(leszeks): This could definitely be a builtin that we tail-call. - __ OptimizeCodeOrTailCallOptimizedCodeSlot( - flags, feedback_vector, kJSFunctionRegister, JumpMode::kJump); - __ Trap(); - } - } - } + void PostProcessGraph(Graph* graph) {} void PreProcessBasicBlock(BasicBlock* block) { + if (block->is_loop()) { + __ LoopHeaderAlign(); + } if (v8_flags.code_comments) { std::stringstream ss; ss << "-- Block b" << graph_labeller()->BlockId(block); __ RecordComment(ss.str()); } - - __ bind(block->label()); + __ BindBlock(block); } template <typename NodeT> @@ -820,13 +727,8 @@ class MaglevCodeGeneratingNodeProcessor { __ RecordComment(ss.str()); } - if (v8_flags.debug_code) { - __ movq(kScratchRegister, rbp); - __ subq(kScratchRegister, rsp); - __ cmpq(kScratchRegister, - Immediate(code_gen_state()->stack_slots() * kSystemPointerSize + - StandardFrameConstants::kFixedFrameSizeFromFp)); - __ Assert(equal, AbortReason::kStackAccessBelowStackPointer); + if (v8_flags.maglev_assert_stack_size) { + __ AssertStackSizeCorrect(); } // Emit Phi moves before visiting the control node. @@ -835,22 +737,47 @@ class MaglevCodeGeneratingNodeProcessor { state); } + if (v8_flags.debug_code && !std::is_same_v<NodeT, Phi>) { + // Check that all int32/uint32 inputs are zero extended. + // Note that we don't do this for Phis, since they are virtual operations + // whose inputs aren't actual inputs but are injected on incoming + // branches. There's thus nothing to verify for the inputs we see for the + // phi. + for (Input& input : *node) { + ValueRepresentation rep = + input.node()->properties().value_representation(); + if (rep == ValueRepresentation::kInt32 || + rep == ValueRepresentation::kUint32) { + // TODO(leszeks): Ideally we'd check non-register inputs too, but + // AssertZeroExtended needs the scratch register, so we'd have to do + // some manual push/pop here to free up another register. + if (input.IsGeneralRegister()) { + __ AssertZeroExtended(ToRegister(input)); + } + } + } + } + + MaglevAssembler::ScratchRegisterScope scratch_scope(masm()); + scratch_scope.Include(node->general_temporaries()); + scratch_scope.IncludeDouble(node->double_temporaries()); + node->GenerateCode(masm(), state); if (std::is_base_of<ValueNode, NodeT>::value) { ValueNode* value_node = node->template Cast<ValueNode>(); - if (value_node->is_spilled()) { + if (value_node->has_valid_live_range() && value_node->is_spilled()) { compiler::AllocatedOperand source = compiler::AllocatedOperand::cast(value_node->result().operand()); // We shouldn't spill nodes which already output to the stack. if (!source.IsAnyStackSlot()) { if (v8_flags.code_comments) __ RecordComment("-- Spill:"); if (source.IsRegister()) { - __ movq(masm()->GetStackSlot(value_node->spill_slot()), + __ Move(masm()->GetStackSlot(value_node->spill_slot()), ToRegister(source)); } else { - __ Movsd(masm()->GetStackSlot(value_node->spill_slot()), - ToDoubleRegister(source)); + __ Move(masm()->GetStackSlot(value_node->spill_slot()), + ToDoubleRegister(source)); } } else { // Otherwise, the result source stack slot should be equal to the @@ -871,14 +798,28 @@ class MaglevCodeGeneratingNodeProcessor { int predecessor_id = state.block()->predecessor_id(); +// TODO(victorgomes): Add a scratch register scope to MaglevAssembler and +// remove this arch depedent code. +#ifdef V8_TARGET_ARCH_ARM64 + UseScratchRegisterScope temps(masm_); + Register scratch = temps.AcquireX(); + DoubleRegister double_scratch = temps.AcquireD(); +#elif V8_TARGET_ARCH_X64 + Register scratch = kScratchRegister; + DoubleRegister double_scratch = kScratchDoubleReg; +#else +#error "Maglev does not supported this architecture." +#endif + // TODO(leszeks): Move these to fields, to allow their data structure // allocations to be reused. Will need some sort of state resetting. - ParallelMoveResolver<Register> register_moves(masm_); - ParallelMoveResolver<DoubleRegister> double_register_moves(masm_); + ParallelMoveResolver<Register, false> register_moves(masm_); + ParallelMoveResolver<DoubleRegister, false> double_register_moves(masm_); // Remember what registers were assigned to by a Phi, to avoid clobbering // them with RegisterMoves. RegList registers_set_by_phis; + DoubleRegList double_registers_set_by_phis; __ RecordComment("-- Gap moves:"); @@ -910,9 +851,19 @@ class MaglevCodeGeneratingNodeProcessor { << graph_labeller()->NodeId(phi) << ")"; __ RecordComment(ss.str()); } - register_moves.RecordMove(node, source, target); + if (phi->value_representation() == ValueRepresentation::kFloat64) { + DCHECK(!phi->decompresses_tagged_result()); + double_register_moves.RecordMove(node, source, target, false); + } else { + register_moves.RecordMove(node, source, target, + kDoesNotNeedDecompression); + } if (target.IsAnyRegister()) { - registers_set_by_phis.set(target.GetRegister()); + if (phi->value_representation() == ValueRepresentation::kFloat64) { + double_registers_set_by_phis.set(target.GetDoubleRegister()); + } else { + registers_set_by_phis.set(target.GetRegister()); + } } } } @@ -932,16 +883,20 @@ class MaglevCodeGeneratingNodeProcessor { ss << "-- * " << source << " → " << reg; __ RecordComment(ss.str()); } - register_moves.RecordMove(node, source, reg); + register_moves.RecordMove(node, source, reg, + kDoesNotNeedDecompression); } }); - register_moves.EmitMoves(); + register_moves.EmitMoves(scratch); __ RecordComment("-- Double gap moves:"); target->state()->register_state().ForEachDoubleRegister( [&](DoubleRegister reg, RegisterState& state) { + // Don't clobber registers set by a Phi. + if (double_registers_set_by_phis.has(reg)) return; + ValueNode* node; RegisterMerge* merge; if (LoadMergeState(state, &node, &merge)) { @@ -952,11 +907,12 @@ class MaglevCodeGeneratingNodeProcessor { ss << "-- * " << source << " → " << reg; __ RecordComment(ss.str()); } - double_register_moves.RecordMove(node, source, reg); + double_register_moves.RecordMove(node, source, reg, + kDoesNotNeedDecompression); } }); - double_register_moves.EmitMoves(); + double_register_moves.EmitMoves(double_scratch); } Isolate* isolate() const { return masm_->isolate(); } @@ -970,9 +926,6 @@ class MaglevCodeGeneratingNodeProcessor { private: MaglevAssembler* const masm_; - Label deferred_call_stack_guard_; - Label deferred_call_stack_guard_return_; - Label deferred_flags_need_processing_; }; class SafepointingNodeProcessor { @@ -992,18 +945,31 @@ class SafepointingNodeProcessor { }; namespace { -int GetFrameCount(const DeoptFrame& deopt_frame) { - switch (deopt_frame.type()) { - case DeoptFrame::FrameType::kInterpretedFrame: - return 1 + deopt_frame.as_interpreted().unit().inlining_depth(); - case DeoptFrame::FrameType::kBuiltinContinuationFrame: - return 1 + GetFrameCount(*deopt_frame.parent()); +struct FrameCount { + int total; + int js_frame; +}; + +FrameCount GetFrameCount(const DeoptFrame* deopt_frame) { + int total = 1; + int js_frame = 1; + while (deopt_frame->parent()) { + deopt_frame = deopt_frame->parent(); + if (deopt_frame->type() != DeoptFrame::FrameType::kInlinedArgumentsFrame) { + js_frame++; + } + total++; } + return FrameCount{total, js_frame}; } + BytecodeOffset GetBytecodeOffset(const DeoptFrame& deopt_frame) { switch (deopt_frame.type()) { case DeoptFrame::FrameType::kInterpretedFrame: return deopt_frame.as_interpreted().bytecode_position(); + case DeoptFrame::FrameType::kInlinedArgumentsFrame: + DCHECK_NOT_NULL(deopt_frame.parent()); + return GetBytecodeOffset(*deopt_frame.parent()); case DeoptFrame::FrameType::kBuiltinContinuationFrame: return Builtins::GetContinuationBytecodeOffset( deopt_frame.as_builtin_continuation().builtin_id()); @@ -1013,10 +979,24 @@ SourcePosition GetSourcePosition(const DeoptFrame& deopt_frame) { switch (deopt_frame.type()) { case DeoptFrame::FrameType::kInterpretedFrame: return deopt_frame.as_interpreted().source_position(); + case DeoptFrame::FrameType::kInlinedArgumentsFrame: + DCHECK_NOT_NULL(deopt_frame.parent()); + return GetSourcePosition(*deopt_frame.parent()); case DeoptFrame::FrameType::kBuiltinContinuationFrame: return SourcePosition::Unknown(); } } +compiler::SharedFunctionInfoRef GetSharedFunctionInfo( + const DeoptFrame& deopt_frame) { + switch (deopt_frame.type()) { + case DeoptFrame::FrameType::kInterpretedFrame: + return deopt_frame.as_interpreted().unit().shared_function_info(); + case DeoptFrame::FrameType::kInlinedArgumentsFrame: + return deopt_frame.as_inlined_arguments().unit().shared_function_info(); + case DeoptFrame::FrameType::kBuiltinContinuationFrame: + return GetSharedFunctionInfo(*deopt_frame.parent()); + } +} } // namespace class MaglevTranslationArrayBuilder { @@ -1031,24 +1011,32 @@ class MaglevTranslationArrayBuilder { deopt_literals_(deopt_literals) {} void BuildEagerDeopt(EagerDeoptInfo* deopt_info) { - int frame_count = GetFrameCount(deopt_info->top_frame()); - int jsframe_count = frame_count; - int update_feedback_count = 0; + auto [frame_count, jsframe_count] = GetFrameCount(&deopt_info->top_frame()); deopt_info->set_translation_index( - translation_array_builder_->BeginTranslation(frame_count, jsframe_count, - update_feedback_count)); + translation_array_builder_->BeginTranslation( + frame_count, jsframe_count, + deopt_info->feedback_to_update().IsValid())); + if (deopt_info->feedback_to_update().IsValid()) { + translation_array_builder_->AddUpdateFeedback( + GetDeoptLiteral(*deopt_info->feedback_to_update().vector), + deopt_info->feedback_to_update().index()); + } const InputLocation* current_input_location = deopt_info->input_locations(); BuildDeoptFrame(deopt_info->top_frame(), current_input_location); } void BuildLazyDeopt(LazyDeoptInfo* deopt_info) { - int frame_count = GetFrameCount(deopt_info->top_frame()); - int jsframe_count = frame_count; - int update_feedback_count = 0; + auto [frame_count, jsframe_count] = GetFrameCount(&deopt_info->top_frame()); deopt_info->set_translation_index( - translation_array_builder_->BeginTranslation(frame_count, jsframe_count, - update_feedback_count)); + translation_array_builder_->BeginTranslation( + frame_count, jsframe_count, + deopt_info->feedback_to_update().IsValid())); + if (deopt_info->feedback_to_update().IsValid()) { + translation_array_builder_->AddUpdateFeedback( + GetDeoptLiteral(*deopt_info->feedback_to_update().vector), + deopt_info->feedback_to_update().index()); + } const InputLocation* current_input_location = deopt_info->input_locations(); @@ -1090,8 +1078,7 @@ class MaglevTranslationArrayBuilder { } translation_array_builder_->BeginInterpretedFrame( interpreted_frame.bytecode_position(), - GetDeoptLiteral( - *interpreted_frame.unit().shared_function_info().object()), + GetDeoptLiteral(GetSharedFunctionInfo(interpreted_frame)), interpreted_frame.unit().register_count(), return_offset, deopt_info->result_size()); @@ -1101,6 +1088,9 @@ class MaglevTranslationArrayBuilder { deopt_info->result_size()); break; } + case DeoptFrame::FrameType::kInlinedArgumentsFrame: + // The inlined arguments frame can never be the top frame. + UNREACHABLE(); case DeoptFrame::FrameType::kBuiltinContinuationFrame: { const BuiltinContinuationDeoptFrame& builtin_continuation_frame = top_frame.as_builtin_continuation(); @@ -1108,11 +1098,7 @@ class MaglevTranslationArrayBuilder { translation_array_builder_->BeginBuiltinContinuationFrame( Builtins::GetContinuationBytecodeOffset( builtin_continuation_frame.builtin_id()), - GetDeoptLiteral(*builtin_continuation_frame.parent() - ->as_interpreted() - .unit() - .shared_function_info() - .object()), + GetDeoptLiteral(GetSharedFunctionInfo(builtin_continuation_frame)), builtin_continuation_frame.parameters().length()); // Closure @@ -1168,8 +1154,7 @@ class MaglevTranslationArrayBuilder { const int return_count = 0; translation_array_builder_->BeginInterpretedFrame( interpreted_frame.bytecode_position(), - GetDeoptLiteral( - *interpreted_frame.unit().shared_function_info().object()), + GetDeoptLiteral(GetSharedFunctionInfo(interpreted_frame)), interpreted_frame.unit().register_count(), return_offset, return_count); @@ -1179,6 +1164,28 @@ class MaglevTranslationArrayBuilder { return_count); break; } + case DeoptFrame::FrameType::kInlinedArgumentsFrame: { + const InlinedArgumentsDeoptFrame& inlined_arguments_frame = + frame.as_inlined_arguments(); + + translation_array_builder_->BeginInlinedExtraArguments( + GetDeoptLiteral(GetSharedFunctionInfo(inlined_arguments_frame)), + static_cast<uint32_t>(inlined_arguments_frame.arguments().size())); + + // Closure + translation_array_builder_->StoreLiteral( + GetDeoptLiteral(inlined_arguments_frame.unit().function())); + + // Arguments + // TODO(victorgomes): Technically we don't need all arguments, only the + // extra ones. But doing this at the moment, since it matches the + // TurboFan behaviour. + for (ValueNode* value : inlined_arguments_frame.arguments()) { + BuildDeoptFrameSingleValue(value, *current_input_location); + current_input_location++; + } + break; + } case DeoptFrame::FrameType::kBuiltinContinuationFrame: { const BuiltinContinuationDeoptFrame& builtin_continuation_frame = frame.as_builtin_continuation(); @@ -1186,11 +1193,7 @@ class MaglevTranslationArrayBuilder { translation_array_builder_->BeginBuiltinContinuationFrame( Builtins::GetContinuationBytecodeOffset( builtin_continuation_frame.builtin_id()), - GetDeoptLiteral(*builtin_continuation_frame.parent() - ->as_interpreted() - .unit() - .shared_function_info() - .object()), + GetDeoptLiteral(GetSharedFunctionInfo(builtin_continuation_frame)), builtin_continuation_frame.parameters().length()); // Closure @@ -1215,12 +1218,17 @@ class MaglevTranslationArrayBuilder { void BuildDeoptStoreRegister(const compiler::AllocatedOperand& operand, ValueRepresentation repr) { switch (repr) { + case ValueRepresentation::kWord64: + UNREACHABLE(); case ValueRepresentation::kTagged: translation_array_builder_->StoreRegister(operand.GetRegister()); break; case ValueRepresentation::kInt32: translation_array_builder_->StoreInt32Register(operand.GetRegister()); break; + case ValueRepresentation::kUint32: + translation_array_builder_->StoreUint32Register(operand.GetRegister()); + break; case ValueRepresentation::kFloat64: translation_array_builder_->StoreDoubleRegister( operand.GetDoubleRegister()); @@ -1232,12 +1240,17 @@ class MaglevTranslationArrayBuilder { ValueRepresentation repr) { int stack_slot = DeoptStackSlotFromStackSlot(operand); switch (repr) { + case ValueRepresentation::kWord64: + UNREACHABLE(); case ValueRepresentation::kTagged: translation_array_builder_->StoreStackSlot(stack_slot); break; case ValueRepresentation::kInt32: translation_array_builder_->StoreInt32StackSlot(stack_slot); break; + case ValueRepresentation::kUint32: + translation_array_builder_->StoreUint32StackSlot(stack_slot); + break; case ValueRepresentation::kFloat64: translation_array_builder_->StoreDoubleStackSlot(stack_slot); break; @@ -1273,7 +1286,7 @@ class MaglevTranslationArrayBuilder { translation_array_builder_->StoreStackSlot(closure_index); } else { translation_array_builder_->StoreLiteral( - GetDeoptLiteral(*compilation_unit.function().object())); + GetDeoptLiteral(compilation_unit.function())); } // TODO(leszeks): The input locations array happens to be in the same order @@ -1346,6 +1359,10 @@ class MaglevTranslationArrayBuilder { return *res.entry; } + int GetDeoptLiteral(compiler::HeapObjectRef ref) { + return GetDeoptLiteral(*ref.object()); + } + LocalIsolate* local_isolate_; MaglevAssembler* masm_; TranslationArrayBuilder* translation_array_builder_; @@ -1381,10 +1398,29 @@ void MaglevCodeGenerator::EmitCode() { MaglevCodeGeneratingNodeProcessor>> processor(SafepointingNodeProcessor{local_isolate_}, MaglevCodeGeneratingNodeProcessor{masm()}); + RecordInlinedFunctions(); processor.ProcessGraph(graph_); EmitDeferredCode(); EmitDeopts(); + if (code_gen_failed_) return; EmitExceptionHandlerTrampolines(); + __ FinishCode(); +} + +void MaglevCodeGenerator::RecordInlinedFunctions() { + // The inlined functions should be the first literals. + DCHECK_EQ(0u, deopt_literals_.size()); + for (OptimizedCompilationInfo::InlinedFunctionHolder& inlined : + graph_->inlined_functions()) { + IdentityMapFindResult<int> res = + deopt_literals_.FindOrInsert(inlined.shared_info); + if (!res.already_exists) { + DCHECK_EQ(0, *res.entry); + *res.entry = deopt_literals_.size() - 1; + } + inlined.RegisterInlinedFunctionId(*res.entry); + } + inlined_function_count_ = static_cast<int>(deopt_literals_.size()); } void MaglevCodeGenerator::EmitDeferredCode() { @@ -1401,9 +1437,28 @@ void MaglevCodeGenerator::EmitDeferredCode() { } void MaglevCodeGenerator::EmitDeopts() { + const size_t num_deopts = code_gen_state_.eager_deopts().size() + + code_gen_state_.lazy_deopts().size(); + if (num_deopts > Deoptimizer::kMaxNumberOfEntries) { + code_gen_failed_ = true; + return; + } + MaglevTranslationArrayBuilder translation_builder( local_isolate_, &masm_, &translation_array_builder_, &deopt_literals_); + // Deoptimization exits must be as small as possible, since their count grows + // with function size. These labels are an optimization which extracts the + // (potentially large) instruction sequence for the final jump to the + // deoptimization entry into a single spot per InstructionStream object. All + // deopt exits can then near-call to this label. Note: not used on all + // architectures. + Label eager_deopt_entry; + Label lazy_deopt_entry; + __ MaybeEmitDeoptBuiltinsCall( + code_gen_state_.eager_deopts().size(), &eager_deopt_entry, + code_gen_state_.lazy_deopts().size(), &lazy_deopt_entry); + deopt_exit_start_offset_ = __ pc_offset(); int deopt_index = 0; @@ -1413,15 +1468,23 @@ void MaglevCodeGenerator::EmitDeopts() { local_isolate_->heap()->Safepoint(); translation_builder.BuildEagerDeopt(deopt_info); - if (masm_.compilation_info()->collect_source_positions()) { + if (masm_.compilation_info()->collect_source_positions() || + IsDeoptimizationWithoutCodeInvalidation(deopt_info->reason())) { + // Note: Maglev uses the deopt_reason to tell the deoptimizer not to + // discard optimized code on deopt during ML-TF OSR. This is why we + // unconditionally emit the deopt_reason when + // IsDeoptimizationWithoutCodeInvalidation is true. __ RecordDeoptReason(deopt_info->reason(), 0, GetSourcePosition(deopt_info->top_frame()), deopt_index); } __ bind(deopt_info->deopt_entry_label()); + __ CallForDeoptimization(Builtin::kDeoptimizationEntry_Eager, deopt_index, deopt_info->deopt_entry_label(), - DeoptimizeKind::kEager, nullptr, nullptr); + DeoptimizeKind::kEager, nullptr, + &eager_deopt_entry); + deopt_index++; } @@ -1436,10 +1499,11 @@ void MaglevCodeGenerator::EmitDeopts() { GetSourcePosition(deopt_info->top_frame()), deopt_index); } - __ bind(deopt_info->deopt_entry_label()); + __ BindExceptionHandler(deopt_info->deopt_entry_label()); + __ CallForDeoptimization(Builtin::kDeoptimizationEntry_Lazy, deopt_index, deopt_info->deopt_entry_label(), - DeoptimizeKind::kLazy, nullptr, nullptr); + DeoptimizeKind::kLazy, nullptr, &lazy_deopt_entry); last_updated_safepoint = safepoint_table_builder_.UpdateDeoptimizationInfo( deopt_info->deopting_call_return_pc(), @@ -1459,7 +1523,7 @@ void MaglevCodeGenerator::EmitExceptionHandlerTrampolines() { void MaglevCodeGenerator::EmitMetadata() { // Final alignment before starting on the metadata section. - masm()->Align(Code::kMetadataAlignment); + masm()->Align(InstructionStream::kMetadataAlignment); safepoint_table_builder_.Emit(masm()); @@ -1473,6 +1537,8 @@ void MaglevCodeGenerator::EmitMetadata() { } MaybeHandle<Code> MaglevCodeGenerator::BuildCodeObject(Isolate* isolate) { + if (code_gen_failed_) return {}; + CodeDesc desc; masm()->GetCode(isolate, &desc, &safepoint_table_builder_, handler_table_offset_); @@ -1501,10 +1567,8 @@ Handle<DeoptimizationData> MaglevCodeGenerator::GenerateDeoptimizationData( auto raw_data = *data; raw_data.SetTranslationByteArray(*translation_array); - // TODO(leszeks): Fix with the real inlined function count. - raw_data.SetInlinedFunctionCount(Smi::zero()); - // TODO(leszeks): Support optimization IDs - raw_data.SetOptimizationId(Smi::zero()); + raw_data.SetInlinedFunctionCount(Smi::FromInt(inlined_function_count_)); + raw_data.SetOptimizationId(Smi::FromInt(isolate->NextOptimizationId())); DCHECK_NE(deopt_exit_start_offset_, -1); raw_data.SetDeoptExitStart(Smi::FromInt(deopt_exit_start_offset_)); @@ -1520,9 +1584,14 @@ Handle<DeoptimizationData> MaglevCodeGenerator::GenerateDeoptimizationData( Handle<DeoptimizationLiteralArray> literals = isolate->factory()->NewDeoptimizationLiteralArray(deopt_literals_.size() + 1); - // TODO(leszeks): Fix with the real inlining positions. + int inlined_functions_size = + static_cast<int>(graph_->inlined_functions().size()); Handle<PodArray<InliningPosition>> inlining_positions = - PodArray<InliningPosition>::New(isolate, 0); + PodArray<InliningPosition>::New(isolate, inlined_functions_size); + for (int i = 0; i < inlined_functions_size; ++i) { + inlining_positions->set(i, graph_->inlined_functions()[i].position); + } + DisallowGarbageCollection no_gc; auto raw_literals = *literals; @@ -1539,8 +1608,6 @@ Handle<DeoptimizationData> MaglevCodeGenerator::GenerateDeoptimizationData( ->bytecode() .object()); raw_data.SetLiteralArray(raw_literals); - - // TODO(leszeks): Fix with the real inlining positions. raw_data.SetInliningPositions(*inlining_positions); // TODO(leszeks): Fix once we have OSR. |