1 files changed, 423 insertions, 356 deletions
diff --git a/deps/v8/src/maglev/maglev-code-generator.cc b/deps/v8/src/maglev/maglev-code-generator.cc
index a20fd22110..1c221459f7 100644
--- a/deps/v8/src/maglev/maglev-code-generator.cc
+++ b/deps/v8/src/maglev/maglev-code-generator.cc
@@ -17,6 +17,7 @@
 #include "src/common/globals.h"
 #include "src/compiler/backend/instruction.h"
 #include "src/deoptimizer/deoptimize-reason.h"
+#include "src/deoptimizer/deoptimizer.h"
 #include "src/deoptimizer/translation-array.h"
 #include "src/execution/frame-constants.h"
 #include "src/interpreter/bytecode-register.h"
@@ -44,16 +45,16 @@ template <typename RegisterT>
 struct RegisterTHelper;
 template <>
 struct RegisterTHelper<Register> {
-  static constexpr Register kScratch = kScratchRegister;
   static constexpr RegList kAllocatableRegisters = kAllocatableGeneralRegisters;
 };
 template <>
 struct RegisterTHelper<DoubleRegister> {
-  static constexpr DoubleRegister kScratch = kScratchDoubleReg;
   static constexpr DoubleRegList kAllocatableRegisters =
       kAllocatableDoubleRegisters;
 };
 
+enum NeedsDecompression { kDoesNotNeedDecompression, kNeedsDecompression };
+
 // The ParallelMoveResolver is used to resolve multiple moves between registers
 // and stack slots that are intended to happen, semantically, in parallel. It
 // finds chains of moves that would clobber each other, and emits them in a non
@@ -86,33 +87,39 @@ struct RegisterTHelper<DoubleRegister> {
 // It additionally keeps track of materialising moves, which don't have a stack
 // slot but rather materialise a value from, e.g., a constant. These can safely
 // be emitted at the end, once all the parallel moves are done.
-template <typename RegisterT>
+template <typename RegisterT, bool DecompressIfNeeded>
 class ParallelMoveResolver {
-  static constexpr RegisterT kScratchRegT =
-      RegisterTHelper<RegisterT>::kScratch;
-
   static constexpr auto kAllocatableRegistersT =
       RegisterTHelper<RegisterT>::kAllocatableRegisters;
+  static_assert(!DecompressIfNeeded || std::is_same_v<Register, RegisterT>);
 
  public:
-  explicit ParallelMoveResolver(MaglevAssembler* masm) : masm_(masm) {}
+  explicit ParallelMoveResolver(MaglevAssembler* masm)
+      : masm_(masm), scratch_(RegisterT::no_reg()) {}
 
   void RecordMove(ValueNode* source_node, compiler::InstructionOperand source,
-                  compiler::AllocatedOperand target) {
-    if (target.IsRegister()) {
-      RecordMoveToRegister(source_node, source, ToRegisterT<RegisterT>(target));
+                  compiler::AllocatedOperand target,
+                  bool target_needs_to_be_decompressed) {
+    if (target.IsAnyRegister()) {
+      RecordMoveToRegister(source_node, source, ToRegisterT<RegisterT>(target),
+                           target_needs_to_be_decompressed);
     } else {
       RecordMoveToStackSlot(source_node, source,
-                            masm_->GetFramePointerOffsetForStackSlot(target));
+                            masm_->GetFramePointerOffsetForStackSlot(target),
+                            target_needs_to_be_decompressed);
     }
   }
 
   void RecordMove(ValueNode* source_node, compiler::InstructionOperand source,
-                  RegisterT target_reg) {
-    RecordMoveToRegister(source_node, source, target_reg);
+                  RegisterT target_reg,
+                  NeedsDecompression target_needs_to_be_decompressed) {
+    RecordMoveToRegister(source_node, source, target_reg,
+                         target_needs_to_be_decompressed);
   }
 
-  void EmitMoves() {
+  void EmitMoves(RegisterT scratch) {
+    DCHECK(!scratch_.is_valid());
+    scratch_ = scratch;
     for (RegisterT reg : kAllocatableRegistersT) {
       StartEmitMoveChain(reg);
       ValueNode* materializing_register_move =
@@ -128,8 +135,8 @@ class ParallelMoveResolver {
       StartEmitMoveChain(moves_from_stack_slot_.begin()->first);
     }
     for (auto [stack_slot, node] : materializing_stack_slot_moves_) {
-      node->LoadToRegister(masm_, kScratchRegT);
-      EmitStackMove(stack_slot, kScratchRegT);
+      node->LoadToRegister(masm_, scratch_);
+      __ Move(StackSlot{stack_slot}, scratch_);
     }
   }
 
@@ -139,12 +146,25 @@ class ParallelMoveResolver {
   ParallelMoveResolver operator=(const ParallelMoveResolver&) = delete;
 
  private:
-  // The targets of moves from a source, i.e. the set of outgoing edges for a
-  // node in the move graph.
+  // For the GapMoveTargets::needs_decompression member when DecompressIfNeeded
+  // is false.
+  struct DummyNeedsDecompression {
+    // NOLINTNEXTLINE
+    DummyNeedsDecompression(NeedsDecompression) {}
+  };
+
+  // The targets of moves from a source, i.e. the set of outgoing edges for
+  // a node in the move graph.
   struct GapMoveTargets {
+    base::SmallVector<int32_t, 1> stack_slots = base::SmallVector<int32_t, 1>{};
     RegListBase<RegisterT> registers;
-    base::SmallVector<uint32_t, 1> stack_slots =
-        base::SmallVector<uint32_t, 1>{};
+
+    // We only need this field for DecompressIfNeeded, otherwise use an empty
+    // dummy value.
+    V8_NO_UNIQUE_ADDRESS
+    std::conditional_t<DecompressIfNeeded, NeedsDecompression,
+                       DummyNeedsDecompression>
+        needs_decompression = kDoesNotNeedDecompression;
 
     GapMoveTargets() = default;
     GapMoveTargets(GapMoveTargets&&) V8_NOEXCEPT = default;
@@ -178,11 +198,11 @@ class ParallelMoveResolver {
     }
   }
 
-  void CheckNoExistingMoveToStackSlot(uint32_t target_slot) {
-    for (Register reg : kAllocatableRegistersT) {
+  void CheckNoExistingMoveToStackSlot(int32_t target_slot) {
+    for (RegisterT reg : kAllocatableRegistersT) {
       auto& stack_slots = moves_from_register_[reg.code()].stack_slots;
       if (std::any_of(stack_slots.begin(), stack_slots.end(),
-                      [&](uint32_t slot) { return slot == target_slot; })) {
+                      [&](int32_t slot) { return slot == target_slot; })) {
         FATAL("Existing move from %s to stack slot %d", RegisterName(reg),
               target_slot);
       }
@@ -190,7 +210,7 @@ class ParallelMoveResolver {
     for (auto& [stack_slot, targets] : moves_from_stack_slot_) {
       auto& stack_slots = targets.stack_slots;
       if (std::any_of(stack_slots.begin(), stack_slots.end(),
-                      [&](uint32_t slot) { return slot == target_slot; })) {
+                      [&](int32_t slot) { return slot == target_slot; })) {
         FATAL("Existing move from stack slot %d to stack slot %d", stack_slot,
               target_slot);
       }
@@ -204,51 +224,99 @@ class ParallelMoveResolver {
   }
 #else
   void CheckNoExistingMoveToRegister(RegisterT target_reg) {}
-  void CheckNoExistingMoveToStackSlot(uint32_t target_slot) {}
+  void CheckNoExistingMoveToStackSlot(int32_t target_slot) {}
 #endif
 
   void RecordMoveToRegister(ValueNode* node,
                             compiler::InstructionOperand source,
-                            RegisterT target_reg) {
+                            RegisterT target_reg,
+                            bool target_needs_to_be_decompressed) {
     // There shouldn't have been another move to this register already.
     CheckNoExistingMoveToRegister(target_reg);
 
+    NeedsDecompression needs_decompression = kDoesNotNeedDecompression;
+    if constexpr (DecompressIfNeeded) {
+      if (target_needs_to_be_decompressed &&
+          !node->decompresses_tagged_result()) {
+        needs_decompression = kNeedsDecompression;
+      }
+    } else {
+      DCHECK_IMPLIES(target_needs_to_be_decompressed,
+                     node->decompresses_tagged_result());
+    }
+
+    GapMoveTargets* targets;
     if (source.IsAnyRegister()) {
       RegisterT source_reg = ToRegisterT<RegisterT>(source);
-      if (target_reg != source_reg) {
-        moves_from_register_[source_reg.code()].registers.set(target_reg);
+      if (target_reg == source_reg) {
+        // We should never have a register aliasing case that needs
+        // decompression, since this path is only used by exception phis and
+        // they have no reg->reg moves.
+        DCHECK_EQ(needs_decompression, kDoesNotNeedDecompression);
+        return;
       }
+      targets = &moves_from_register_[source_reg.code()];
     } else if (source.IsAnyStackSlot()) {
-      uint32_t source_slot = masm_->GetFramePointerOffsetForStackSlot(
+      int32_t source_slot = masm_->GetFramePointerOffsetForStackSlot(
           compiler::AllocatedOperand::cast(source));
-      moves_from_stack_slot_[source_slot].registers.set(target_reg);
+      targets = &moves_from_stack_slot_[source_slot];
     } else {
       DCHECK(source.IsConstant());
       DCHECK(IsConstantNode(node->opcode()));
       materializing_register_moves_[target_reg.code()] = node;
+      // No need to update `targets.needs_decompression`, materialization is
+      // always decompressed.
+      return;
+    }
+
+    targets->registers.set(target_reg);
+    if (needs_decompression == kNeedsDecompression) {
+      targets->needs_decompression = kNeedsDecompression;
     }
   }
 
   void RecordMoveToStackSlot(ValueNode* node,
                              compiler::InstructionOperand source,
-                             uint32_t target_slot) {
+                             int32_t target_slot,
+                             bool target_needs_to_be_decompressed) {
     // There shouldn't have been another move to this stack slot already.
     CheckNoExistingMoveToStackSlot(target_slot);
 
+    NeedsDecompression needs_decompression = kDoesNotNeedDecompression;
+    if constexpr (DecompressIfNeeded) {
+      if (target_needs_to_be_decompressed &&
+          !node->decompresses_tagged_result()) {
+        needs_decompression = kNeedsDecompression;
+      }
+    } else {
+      DCHECK_IMPLIES(target_needs_to_be_decompressed,
+                     node->decompresses_tagged_result());
+    }
+
+    GapMoveTargets* targets;
     if (source.IsAnyRegister()) {
       RegisterT source_reg = ToRegisterT<RegisterT>(source);
-      moves_from_register_[source_reg.code()].stack_slots.push_back(
-          target_slot);
+      targets = &moves_from_register_[source_reg.code()];
     } else if (source.IsAnyStackSlot()) {
-      uint32_t source_slot = masm_->GetFramePointerOffsetForStackSlot(
+      int32_t source_slot = masm_->GetFramePointerOffsetForStackSlot(
           compiler::AllocatedOperand::cast(source));
-      if (source_slot != target_slot) {
-        moves_from_stack_slot_[source_slot].stack_slots.push_back(target_slot);
+      if (source_slot == target_slot &&
+          needs_decompression == kDoesNotNeedDecompression) {
+        return;
       }
+      targets = &moves_from_stack_slot_[source_slot];
     } else {
       DCHECK(source.IsConstant());
       DCHECK(IsConstantNode(node->opcode()));
       materializing_stack_slot_moves_.emplace_back(target_slot, node);
+      // No need to update `targets.needs_decompression`, materialization is
+      // always decompressed.
+      return;
+    }
+
+    targets->stack_slots.push_back(target_slot);
+    if (needs_decompression == kNeedsDecompression) {
+      targets->needs_decompression = kNeedsDecompression;
     }
   }
 
@@ -258,7 +326,7 @@ class ParallelMoveResolver {
     return std::exchange(moves_from_register_[source_reg.code()],
                          GapMoveTargets{});
   }
-  GapMoveTargets PopTargets(uint32_t source_slot) {
+  GapMoveTargets PopTargets(int32_t source_slot) {
     auto handle = moves_from_stack_slot_.extract(source_slot);
     if (handle.empty()) return {};
     DCHECK(!handle.mapped().is_empty());
@@ -286,10 +354,10 @@ class ParallelMoveResolver {
     // chain start.
     if (has_cycle) {
       if (!scratch_has_cycle_start_) {
-        Pop(kScratchRegT);
+        Pop(scratch_);
         scratch_has_cycle_start_ = true;
       }
-      EmitMovesFromSource(kScratchRegT, std::move(targets));
+      EmitMovesFromSource(scratch_, std::move(targets));
       scratch_has_cycle_start_ = false;
       __ RecordComment("--   * End of cycle");
     } else {
@@ -306,10 +374,10 @@ class ParallelMoveResolver {
       if (chain_start == source) {
         __ RecordComment("--   * Cycle");
         DCHECK(!scratch_has_cycle_start_);
-        if constexpr (std::is_same_v<ChainStartT, uint32_t>) {
-          EmitStackMove(kScratchRegT, chain_start);
+        if constexpr (std::is_same_v<ChainStartT, int32_t>) {
+          __ Move(scratch_, StackSlot{chain_start});
         } else {
-          __ Move(kScratchRegT, chain_start);
+          __ Move(scratch_, chain_start);
         }
         scratch_has_cycle_start_ = true;
         return true;
@@ -338,7 +406,7 @@ class ParallelMoveResolver {
     for (auto target : targets.registers) {
       has_cycle |= ContinueEmitMoveChain(chain_start, target);
     }
-    for (uint32_t target_slot : targets.stack_slots) {
+    for (int32_t target_slot : targets.stack_slots) {
       has_cycle |= ContinueEmitMoveChain(chain_start, target_slot);
     }
     return has_cycle;
@@ -346,18 +414,23 @@ class ParallelMoveResolver {
 
   void EmitMovesFromSource(RegisterT source_reg, GapMoveTargets&& targets) {
     DCHECK(moves_from_register_[source_reg.code()].is_empty());
+    if constexpr (DecompressIfNeeded) {
+      if (targets.needs_decompression == kNeedsDecompression) {
+        __ DecompressTagged(source_reg, source_reg);
+      }
+    }
     for (RegisterT target_reg : targets.registers) {
       DCHECK(moves_from_register_[target_reg.code()].is_empty());
       __ Move(target_reg, source_reg);
     }
-    for (uint32_t target_slot : targets.stack_slots) {
+    for (int32_t target_slot : targets.stack_slots) {
       DCHECK_EQ(moves_from_stack_slot_.find(target_slot),
                 moves_from_stack_slot_.end());
-      EmitStackMove(target_slot, source_reg);
+      __ Move(StackSlot{target_slot}, source_reg);
     }
   }
 
-  void EmitMovesFromSource(uint32_t source_slot, GapMoveTargets&& targets) {
+  void EmitMovesFromSource(int32_t source_slot, GapMoveTargets&& targets) {
     DCHECK_EQ(moves_from_stack_slot_.find(source_slot),
               moves_from_stack_slot_.end());
 
@@ -372,51 +445,35 @@ class ParallelMoveResolver {
       // Otherwise, cache the slot value on the scratch register, clobbering it
       // if necessary.
       if (scratch_has_cycle_start_) {
-        Push(kScratchRegT);
+        Push(scratch_);
         scratch_has_cycle_start_ = false;
       }
-      register_with_slot_value = kScratchRegT;
+      register_with_slot_value = scratch_;
     }
-
     // Now emit moves from that cached register instead of from the stack slot.
     DCHECK(register_with_slot_value.is_valid());
     DCHECK(moves_from_register_[register_with_slot_value.code()].is_empty());
-    EmitStackMove(register_with_slot_value, source_slot);
+    __ Move(register_with_slot_value, StackSlot{source_slot});
+    // Decompress after the first move, subsequent moves reuse this register so
+    // they're guaranteed to be decompressed.
+    if constexpr (DecompressIfNeeded) {
+      if (targets.needs_decompression == kNeedsDecompression) {
+        __ DecompressTagged(register_with_slot_value, register_with_slot_value);
+        targets.needs_decompression = kDoesNotNeedDecompression;
+      }
+    }
     EmitMovesFromSource(register_with_slot_value, std::move(targets));
   }
 
-  // The slot index used for representing slots in the move graph is the offset
-  // from the frame pointer. These helpers help translate this into an actual
-  // machine move.
-  void EmitStackMove(uint32_t target_slot, Register source_reg) {
-    __ movq(MemOperand(rbp, target_slot), source_reg);
-  }
-  void EmitStackMove(uint32_t target_slot, DoubleRegister source_reg) {
-    __ Movsd(MemOperand(rbp, target_slot), source_reg);
-  }
-  void EmitStackMove(Register target_reg, uint32_t source_slot) {
-    __ movq(target_reg, MemOperand(rbp, source_slot));
-  }
-  void EmitStackMove(DoubleRegister target_reg, uint32_t source_slot) {
-    __ Movsd(target_reg, MemOperand(rbp, source_slot));
-  }
-
   void Push(Register reg) { __ Push(reg); }
   void Push(DoubleRegister reg) { __ PushAll({reg}); }
-  void Push(uint32_t stack_slot) {
-    __ movq(kScratchRegister, MemOperand(rbp, stack_slot));
-    __ movq(MemOperand(rsp, -1), kScratchRegister);
-  }
   void Pop(Register reg) { __ Pop(reg); }
   void Pop(DoubleRegister reg) { __ PopAll({reg}); }
-  void Pop(uint32_t stack_slot) {
-    __ movq(kScratchRegister, MemOperand(rsp, -1));
-    __ movq(MemOperand(rbp, stack_slot), kScratchRegister);
-  }
 
-  MacroAssembler* masm() const { return masm_; }
+  MaglevAssembler* masm() const { return masm_; }
 
   MaglevAssembler* const masm_;
+  RegisterT scratch_;
 
   // Keep moves to/from registers and stack slots separate -- there are a fixed
   // number of registers but an infinite number of stack slots, so the register
@@ -427,15 +484,16 @@ class ParallelMoveResolver {
   std::array<GapMoveTargets, RegisterT::kNumRegisters> moves_from_register_ =
       {};
 
+  // TODO(victorgomes): Use MaglevAssembler::StackSlot instead of int32_t.
   // moves_from_stack_slot_[source] = target.
-  std::unordered_map<uint32_t, GapMoveTargets> moves_from_stack_slot_;
+  std::unordered_map<int32_t, GapMoveTargets> moves_from_stack_slot_;
 
   // materializing_register_moves[target] = node.
   std::array<ValueNode*, RegisterT::kNumRegisters>
       materializing_register_moves_ = {};
 
   // materializing_stack_slot_moves = {(node,target), ... }.
-  std::vector<std::pair<uint32_t, ValueNode*>> materializing_stack_slot_moves_;
+  std::vector<std::pair<int32_t, ValueNode*>> materializing_stack_slot_moves_;
 
   bool scratch_has_cycle_start_ = false;
 };
@@ -486,35 +544,46 @@ class ExceptionHandlerTrampolineBuilder {
     // values are tagged and b) the stack walk treats unknown stack slots as
     // tagged.
 
-    const InterpretedDeoptFrame& lazy_frame =
-        deopt_info->top_frame().type() ==
-                DeoptFrame::FrameType::kBuiltinContinuationFrame
-            ? deopt_info->top_frame().parent()->as_interpreted()
-            : deopt_info->top_frame().as_interpreted();
+    // TODO(victorgomes): Update this once we support exceptions in inlined
+    // functions. Currently, only the bottom frame can contain a catch block.
+    const DeoptFrame* bottom_frame = &deopt_info->top_frame();
+    while (bottom_frame->parent() != nullptr) {
+      bottom_frame = bottom_frame->parent();
+    }
+    const InterpretedDeoptFrame& lazy_frame = bottom_frame->as_interpreted();
 
     // TODO(v8:7700): Handle inlining.
-
-    ParallelMoveResolver<Register> direct_moves(masm_);
+    ParallelMoveResolver<Register, true> direct_moves(masm_);
     MoveVector materialising_moves;
     bool save_accumulator = false;
     RecordMoves(lazy_frame.unit(), catch_block, lazy_frame.frame_state(),
                 &direct_moves, &materialising_moves, &save_accumulator);
-
-    __ bind(&handler_info->trampoline_entry);
+    __ BindJumpTarget(&handler_info->trampoline_entry);
     __ RecordComment("-- Exception handler trampoline START");
     EmitMaterialisationsAndPushResults(materialising_moves, save_accumulator);
+
     __ RecordComment("EmitMoves");
-    direct_moves.EmitMoves();
-    EmitPopMaterialisedResults(materialising_moves, save_accumulator);
-    __ jmp(catch_block->label());
+// TODO(victorgomes): Add a scratch register scope to MaglevAssembler and
+// remove this arch depedent code.
+#ifdef V8_TARGET_ARCH_ARM64
+    UseScratchRegisterScope temps(masm_);
+    Register scratch = temps.AcquireX();
+#elif V8_TARGET_ARCH_X64
+    Register scratch = kScratchRegister;
+#else
+#error "Maglev does not supported this architecture."
+#endif
+    direct_moves.EmitMoves(scratch);
+    EmitPopMaterialisedResults(materialising_moves, save_accumulator, scratch);
+    __ Jump(catch_block->label());
     __ RecordComment("-- Exception handler trampoline END");
   }
 
-  MacroAssembler* masm() const { return masm_; }
+  MaglevAssembler* masm() const { return masm_; }
 
   void RecordMoves(const MaglevCompilationUnit& unit, BasicBlock* catch_block,
                    const CompactInterpreterFrameState* register_frame,
-                   ParallelMoveResolver<Register>* direct_moves,
+                   ParallelMoveResolver<Register, true>* direct_moves,
                    MoveVector* materialising_moves, bool* save_accumulator) {
     for (Phi* phi : *catch_block->phis()) {
       DCHECK(phi->is_exception_phi());
@@ -540,22 +609,18 @@ class ExceptionHandlerTrampolineBuilder {
       DCHECK(!source->allocation().IsRegister());
 
       switch (source->properties().value_representation()) {
+        case ValueRepresentation::kWord64:
+          UNREACHABLE();
         case ValueRepresentation::kTagged:
           direct_moves->RecordMove(
               source, source->allocation(),
-              compiler::AllocatedOperand::cast(target.operand()));
+              compiler::AllocatedOperand::cast(target.operand()),
+              phi->decompresses_tagged_result() ? kNeedsDecompression
+                                                : kDoesNotNeedDecompression);
           break;
         case ValueRepresentation::kInt32:
-          if (source->allocation().IsConstant()) {
-            // TODO(jgruber): Why is it okay for Int32 constants to remain
-            // untagged while non-constants are unconditionally smi-tagged or
-            // converted to a HeapNumber during materialisation?
-            direct_moves->RecordMove(
-                source, source->allocation(),
-                compiler::AllocatedOperand::cast(target.operand()));
-          } else {
-            materialising_moves->emplace_back(target, source);
-          }
+        case ValueRepresentation::kUint32:
+          materialising_moves->emplace_back(target, source);
           break;
         case ValueRepresentation::kFloat64:
           materialising_moves->emplace_back(target, source);
@@ -582,73 +647,37 @@ class ExceptionHandlerTrampolineBuilder {
     // talking about a presumably infrequent case for exception handlers.
 
     __ RecordComment("EmitMaterialisationsAndPushResults");
+
     if (save_accumulator) __ Push(kReturnRegister0);
     for (const Move& move : moves) {
-      MaterialiseTo(move.source, kReturnRegister0);
+      // We consider constants after all other operations, since constants
+      // don't need to call NewHeapNumber.
+      if (IsConstantNode(move.source->opcode())) continue;
+      __ MaterialiseValueNode(kReturnRegister0, move.source);
       __ Push(kReturnRegister0);
     }
   }
 
   void EmitPopMaterialisedResults(const MoveVector& moves,
-                                  bool save_accumulator) const {
+                                  bool save_accumulator,
+                                  Register scratch) const {
     if (moves.size() == 0) return;
     __ RecordComment("EmitPopMaterialisedResults");
-    for (auto it = moves.rbegin(); it < moves.rend(); it++) {
-      const ValueLocation& target = it->target;
-      if (target.operand().IsRegister()) {
-        __ Pop(target.AssignedGeneralRegister());
+    for (const Move& move : base::Reversed(moves)) {
+      const ValueLocation& target = move.target;
+      Register target_reg = target.operand().IsAnyRegister()
+                                ? target.AssignedGeneralRegister()
+                                : scratch;
+      if (IsConstantNode(move.source->opcode())) {
+        __ MaterialiseValueNode(target_reg, move.source);
       } else {
-        DCHECK(target.operand().IsStackSlot());
-        __ Pop(kScratchRegister);
-        __ movq(masm_->ToMemOperand(target.operand()), kScratchRegister);
+        __ Pop(target_reg);
       }
-    }
-
-    if (save_accumulator) __ Pop(kReturnRegister0);
-  }
-
-  void MaterialiseTo(ValueNode* value, Register dst) const {
-    using D = NewHeapNumberDescriptor;
-    switch (value->properties().value_representation()) {
-      case ValueRepresentation::kInt32: {
-        // We consider Int32Constants together with tagged values.
-        DCHECK(!value->allocation().IsConstant());
-        Label done;
-        __ movq(dst, ToMemOperand(value));
-        __ addl(dst, dst);
-        __ j(no_overflow, &done);
-        // If we overflow, instead of bailing out (deopting), we change
-        // representation to a HeapNumber.
-        __ Cvtlsi2sd(D::GetDoubleRegisterParameter(D::kValue),
-                     ToMemOperand(value));
-        __ CallBuiltin(Builtin::kNewHeapNumber);
-        __ Move(dst, kReturnRegister0);
-        __ bind(&done);
-        break;
+      if (target_reg == scratch) {
+        __ Move(masm_->ToMemOperand(target.operand()), scratch);
       }
-      case ValueRepresentation::kFloat64:
-        if (Float64Constant* constant = value->TryCast<Float64Constant>()) {
-          __ Move(D::GetDoubleRegisterParameter(D::kValue), constant->value());
-        } else {
-          __ Movsd(D::GetDoubleRegisterParameter(D::kValue),
-                   ToMemOperand(value));
-        }
-        __ CallBuiltin(Builtin::kNewHeapNumber);
-        __ Move(dst, kReturnRegister0);
-        break;
-      case ValueRepresentation::kTagged:
-        UNREACHABLE();
     }
-  }
-
-  MemOperand ToMemOperand(ValueNode* node) const {
-    DCHECK(node->allocation().IsAnyStackSlot());
-    return masm_->ToMemOperand(node->allocation());
-  }
-
-  MemOperand ToMemOperand(const ValueLocation& location) const {
-    DCHECK(location.operand().IsStackSlot());
-    return masm_->ToMemOperand(location.operand());
+    if (save_accumulator) __ Pop(kReturnRegister0);
   }
 
   MaglevAssembler* const masm_;
@@ -660,155 +689,33 @@ class MaglevCodeGeneratingNodeProcessor {
       : masm_(masm) {}
 
   void PreProcessGraph(Graph* graph) {
+    // TODO(victorgomes): I wonder if we want to create a struct that shares
+    // these fields between graph and code_gen_state.
     code_gen_state()->set_untagged_slots(graph->untagged_stack_slots());
     code_gen_state()->set_tagged_slots(graph->tagged_stack_slots());
+    code_gen_state()->set_max_deopted_stack_size(
+        graph->max_deopted_stack_size());
+    code_gen_state()->set_max_call_stack_args_(graph->max_call_stack_args());
 
     if (v8_flags.maglev_break_on_entry) {
-      __ int3();
+      __ DebugBreak();
     }
 
-    if (v8_flags.maglev_ool_prologue) {
-      // Call the out-of-line prologue (with parameters passed on the stack).
-      __ Push(Immediate(code_gen_state()->stack_slots() * kSystemPointerSize));
-      __ Push(Immediate(code_gen_state()->tagged_slots() * kSystemPointerSize));
-      __ CallBuiltin(Builtin::kMaglevOutOfLinePrologue);
-    } else {
-      __ BailoutIfDeoptimized(rbx);
-
-      // Tiering support.
-      // TODO(jgruber): Extract to a builtin (the tiering prologue is ~230 bytes
-      // per Maglev code object on x64).
-      {
-        // Scratch registers. Don't clobber regs related to the calling
-        // convention (e.g. kJavaScriptCallArgCountRegister). Keep up-to-date
-        // with deferred flags code.
-        Register flags = rcx;
-        Register feedback_vector = r9;
-
-        // Load the feedback vector.
-        __ LoadTaggedPointerField(
-            feedback_vector,
-            FieldOperand(kJSFunctionRegister, JSFunction::kFeedbackCellOffset));
-        __ LoadTaggedPointerField(
-            feedback_vector, FieldOperand(feedback_vector, Cell::kValueOffset));
-        __ AssertFeedbackVector(feedback_vector);
-
-        __ LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
-            flags, feedback_vector, CodeKind::MAGLEV,
-            &deferred_flags_need_processing_);
-      }
-
-      __ EnterFrame(StackFrame::MAGLEV);
-
-      // Save arguments in frame.
-      // TODO(leszeks): Consider eliding this frame if we don't make any calls
-      // that could clobber these registers.
-      __ Push(kContextRegister);
-      __ Push(kJSFunctionRegister);              // Callee's JS function.
-      __ Push(kJavaScriptCallArgCountRegister);  // Actual argument count.
-
-      {
-        ASM_CODE_COMMENT_STRING(masm(), " Stack/interrupt check");
-        // Stack check. This folds the checks for both the interrupt stack limit
-        // check and the real stack limit into one by just checking for the
-        // interrupt limit. The interrupt limit is either equal to the real
-        // stack limit or tighter. By ensuring we have space until that limit
-        // after building the frame we can quickly precheck both at once.
-        __ Move(kScratchRegister, rsp);
-        // TODO(leszeks): Include a max call argument size here.
-        __ subq(kScratchRegister, Immediate(code_gen_state()->stack_slots() *
-                                            kSystemPointerSize));
-        __ cmpq(kScratchRegister,
-                __ StackLimitAsOperand(StackLimitKind::kInterruptStackLimit));
-
-        __ j(below, &deferred_call_stack_guard_);
-        __ bind(&deferred_call_stack_guard_return_);
-      }
-
-      // Initialize stack slots.
-      if (graph->tagged_stack_slots() > 0) {
-        ASM_CODE_COMMENT_STRING(masm(), "Initializing stack slots");
-        // TODO(leszeks): Consider filling with xmm + movdqa instead.
-        __ Move(rax, Immediate(0));
-
-        // Magic value. Experimentally, an unroll size of 8 doesn't seem any
-        // worse than fully unrolled pushes.
-        const int kLoopUnrollSize = 8;
-        int tagged_slots = graph->tagged_stack_slots();
-        if (tagged_slots < 2 * kLoopUnrollSize) {
-          // If the frame is small enough, just unroll the frame fill
-          // completely.
-          for (int i = 0; i < tagged_slots; ++i) {
-            __ pushq(rax);
-          }
-        } else {
-          // Extract the first few slots to round to the unroll size.
-          int first_slots = tagged_slots % kLoopUnrollSize;
-          for (int i = 0; i < first_slots; ++i) {
-            __ pushq(rax);
-          }
-          __ Move(rbx, Immediate(tagged_slots / kLoopUnrollSize));
-          // We enter the loop unconditionally, so make sure we need to loop at
-          // least once.
-          DCHECK_GT(tagged_slots / kLoopUnrollSize, 0);
-          Label loop;
-          __ bind(&loop);
-          for (int i = 0; i < kLoopUnrollSize; ++i) {
-            __ pushq(rax);
-          }
-          __ decl(rbx);
-          __ j(greater, &loop);
-        }
-      }
-      if (graph->untagged_stack_slots() > 0) {
-        // Extend rsp by the size of the remaining untagged part of the frame,
-        // no need to initialise these.
-        __ subq(rsp,
-                Immediate(graph->untagged_stack_slots() * kSystemPointerSize));
-      }
-    }
+    __ Prologue(graph);
   }
 
-  void PostProcessGraph(Graph*) {
-    __ int3();
-
-    if (!v8_flags.maglev_ool_prologue) {
-      __ bind(&deferred_call_stack_guard_);
-      {
-        ASM_CODE_COMMENT_STRING(masm(), "Stack/interrupt call");
-        // Save any registers that can be referenced by RegisterInput.
-        // TODO(leszeks): Only push those that are used by the graph.
-        __ PushAll(RegisterInput::kAllowedRegisters);
-        // Push the frame size
-        __ Push(Immediate(Smi::FromInt(code_gen_state()->stack_slots() *
-                                       kSystemPointerSize)));
-        __ CallRuntime(Runtime::kStackGuardWithGap, 1);
-        __ PopAll(RegisterInput::kAllowedRegisters);
-        __ jmp(&deferred_call_stack_guard_return_);
-      }
-
-      __ bind(&deferred_flags_need_processing_);
-      {
-        ASM_CODE_COMMENT_STRING(masm(), "Optimized marker check");
-        // See PreProcessGraph.
-        Register flags = rcx;
-        Register feedback_vector = r9;
-        // TODO(leszeks): This could definitely be a builtin that we tail-call.
-        __ OptimizeCodeOrTailCallOptimizedCodeSlot(
-            flags, feedback_vector, kJSFunctionRegister, JumpMode::kJump);
-        __ Trap();
-      }
-    }
-  }
+  void PostProcessGraph(Graph* graph) {}
 
   void PreProcessBasicBlock(BasicBlock* block) {
+    if (block->is_loop()) {
+      __ LoopHeaderAlign();
+    }
     if (v8_flags.code_comments) {
       std::stringstream ss;
       ss << "-- Block b" << graph_labeller()->BlockId(block);
       __ RecordComment(ss.str());
     }
-
-    __ bind(block->label());
+    __ BindBlock(block);
   }
 
   template <typename NodeT>
@@ -820,13 +727,8 @@ class MaglevCodeGeneratingNodeProcessor {
       __ RecordComment(ss.str());
     }
 
-    if (v8_flags.debug_code) {
-      __ movq(kScratchRegister, rbp);
-      __ subq(kScratchRegister, rsp);
-      __ cmpq(kScratchRegister,
-              Immediate(code_gen_state()->stack_slots() * kSystemPointerSize +
-                        StandardFrameConstants::kFixedFrameSizeFromFp));
-      __ Assert(equal, AbortReason::kStackAccessBelowStackPointer);
+    if (v8_flags.maglev_assert_stack_size) {
+      __ AssertStackSizeCorrect();
     }
 
     // Emit Phi moves before visiting the control node.
@@ -835,22 +737,47 @@ class MaglevCodeGeneratingNodeProcessor {
                            state);
     }
 
+    if (v8_flags.debug_code && !std::is_same_v<NodeT, Phi>) {
+      // Check that all int32/uint32 inputs are zero extended.
+      // Note that we don't do this for Phis, since they are virtual operations
+      // whose inputs aren't actual inputs but are injected on incoming
+      // branches. There's thus nothing to verify for the inputs we see for the
+      // phi.
+      for (Input& input : *node) {
+        ValueRepresentation rep =
+            input.node()->properties().value_representation();
+        if (rep == ValueRepresentation::kInt32 ||
+            rep == ValueRepresentation::kUint32) {
+          // TODO(leszeks): Ideally we'd check non-register inputs too, but
+          // AssertZeroExtended needs the scratch register, so we'd have to do
+          // some manual push/pop here to free up another register.
+          if (input.IsGeneralRegister()) {
+            __ AssertZeroExtended(ToRegister(input));
+          }
+        }
+      }
+    }
+
+    MaglevAssembler::ScratchRegisterScope scratch_scope(masm());
+    scratch_scope.Include(node->general_temporaries());
+    scratch_scope.IncludeDouble(node->double_temporaries());
+
     node->GenerateCode(masm(), state);
 
     if (std::is_base_of<ValueNode, NodeT>::value) {
       ValueNode* value_node = node->template Cast<ValueNode>();
-      if (value_node->is_spilled()) {
+      if (value_node->has_valid_live_range() && value_node->is_spilled()) {
         compiler::AllocatedOperand source =
             compiler::AllocatedOperand::cast(value_node->result().operand());
         // We shouldn't spill nodes which already output to the stack.
         if (!source.IsAnyStackSlot()) {
           if (v8_flags.code_comments) __ RecordComment("--   Spill:");
           if (source.IsRegister()) {
-            __ movq(masm()->GetStackSlot(value_node->spill_slot()),
+            __ Move(masm()->GetStackSlot(value_node->spill_slot()),
                     ToRegister(source));
           } else {
-            __ Movsd(masm()->GetStackSlot(value_node->spill_slot()),
-                     ToDoubleRegister(source));
+            __ Move(masm()->GetStackSlot(value_node->spill_slot()),
+                    ToDoubleRegister(source));
           }
         } else {
           // Otherwise, the result source stack slot should be equal to the
@@ -871,14 +798,28 @@ class MaglevCodeGeneratingNodeProcessor {
 
     int predecessor_id = state.block()->predecessor_id();
 
+// TODO(victorgomes): Add a scratch register scope to MaglevAssembler and
+// remove this arch depedent code.
+#ifdef V8_TARGET_ARCH_ARM64
+    UseScratchRegisterScope temps(masm_);
+    Register scratch = temps.AcquireX();
+    DoubleRegister double_scratch = temps.AcquireD();
+#elif V8_TARGET_ARCH_X64
+    Register scratch = kScratchRegister;
+    DoubleRegister double_scratch = kScratchDoubleReg;
+#else
+#error "Maglev does not supported this architecture."
+#endif
+
     // TODO(leszeks): Move these to fields, to allow their data structure
     // allocations to be reused. Will need some sort of state resetting.
-    ParallelMoveResolver<Register> register_moves(masm_);
-    ParallelMoveResolver<DoubleRegister> double_register_moves(masm_);
+    ParallelMoveResolver<Register, false> register_moves(masm_);
+    ParallelMoveResolver<DoubleRegister, false> double_register_moves(masm_);
 
     // Remember what registers were assigned to by a Phi, to avoid clobbering
     // them with RegisterMoves.
     RegList registers_set_by_phis;
+    DoubleRegList double_registers_set_by_phis;
 
     __ RecordComment("--   Gap moves:");
 
@@ -910,9 +851,19 @@ class MaglevCodeGeneratingNodeProcessor {
              << graph_labeller()->NodeId(phi) << ")";
           __ RecordComment(ss.str());
         }
-        register_moves.RecordMove(node, source, target);
+        if (phi->value_representation() == ValueRepresentation::kFloat64) {
+          DCHECK(!phi->decompresses_tagged_result());
+          double_register_moves.RecordMove(node, source, target, false);
+        } else {
+          register_moves.RecordMove(node, source, target,
+                                    kDoesNotNeedDecompression);
+        }
         if (target.IsAnyRegister()) {
-          registers_set_by_phis.set(target.GetRegister());
+          if (phi->value_representation() == ValueRepresentation::kFloat64) {
+            double_registers_set_by_phis.set(target.GetDoubleRegister());
+          } else {
+            registers_set_by_phis.set(target.GetRegister());
+          }
         }
       }
     }
@@ -932,16 +883,20 @@ class MaglevCodeGeneratingNodeProcessor {
               ss << "--   * " << source << " → " << reg;
               __ RecordComment(ss.str());
             }
-            register_moves.RecordMove(node, source, reg);
+            register_moves.RecordMove(node, source, reg,
+                                      kDoesNotNeedDecompression);
           }
         });
 
-    register_moves.EmitMoves();
+    register_moves.EmitMoves(scratch);
 
     __ RecordComment("--   Double gap moves:");
 
     target->state()->register_state().ForEachDoubleRegister(
         [&](DoubleRegister reg, RegisterState& state) {
+          // Don't clobber registers set by a Phi.
+          if (double_registers_set_by_phis.has(reg)) return;
+
           ValueNode* node;
           RegisterMerge* merge;
           if (LoadMergeState(state, &node, &merge)) {
@@ -952,11 +907,12 @@ class MaglevCodeGeneratingNodeProcessor {
               ss << "--   * " << source << " → " << reg;
               __ RecordComment(ss.str());
             }
-            double_register_moves.RecordMove(node, source, reg);
+            double_register_moves.RecordMove(node, source, reg,
+                                             kDoesNotNeedDecompression);
           }
         });
 
-    double_register_moves.EmitMoves();
+    double_register_moves.EmitMoves(double_scratch);
   }
 
   Isolate* isolate() const { return masm_->isolate(); }
@@ -970,9 +926,6 @@ class MaglevCodeGeneratingNodeProcessor {
 
  private:
   MaglevAssembler* const masm_;
-  Label deferred_call_stack_guard_;
-  Label deferred_call_stack_guard_return_;
-  Label deferred_flags_need_processing_;
 };
 
 class SafepointingNodeProcessor {
@@ -992,18 +945,31 @@ class SafepointingNodeProcessor {
 };
 
 namespace {
-int GetFrameCount(const DeoptFrame& deopt_frame) {
-  switch (deopt_frame.type()) {
-    case DeoptFrame::FrameType::kInterpretedFrame:
-      return 1 + deopt_frame.as_interpreted().unit().inlining_depth();
-    case DeoptFrame::FrameType::kBuiltinContinuationFrame:
-      return 1 + GetFrameCount(*deopt_frame.parent());
+struct FrameCount {
+  int total;
+  int js_frame;
+};
+
+FrameCount GetFrameCount(const DeoptFrame* deopt_frame) {
+  int total = 1;
+  int js_frame = 1;
+  while (deopt_frame->parent()) {
+    deopt_frame = deopt_frame->parent();
+    if (deopt_frame->type() != DeoptFrame::FrameType::kInlinedArgumentsFrame) {
+      js_frame++;
+    }
+    total++;
   }
+  return FrameCount{total, js_frame};
 }
+
 BytecodeOffset GetBytecodeOffset(const DeoptFrame& deopt_frame) {
   switch (deopt_frame.type()) {
     case DeoptFrame::FrameType::kInterpretedFrame:
       return deopt_frame.as_interpreted().bytecode_position();
+    case DeoptFrame::FrameType::kInlinedArgumentsFrame:
+      DCHECK_NOT_NULL(deopt_frame.parent());
+      return GetBytecodeOffset(*deopt_frame.parent());
     case DeoptFrame::FrameType::kBuiltinContinuationFrame:
       return Builtins::GetContinuationBytecodeOffset(
           deopt_frame.as_builtin_continuation().builtin_id());
@@ -1013,10 +979,24 @@ SourcePosition GetSourcePosition(const DeoptFrame& deopt_frame) {
   switch (deopt_frame.type()) {
     case DeoptFrame::FrameType::kInterpretedFrame:
       return deopt_frame.as_interpreted().source_position();
+    case DeoptFrame::FrameType::kInlinedArgumentsFrame:
+      DCHECK_NOT_NULL(deopt_frame.parent());
+      return GetSourcePosition(*deopt_frame.parent());
     case DeoptFrame::FrameType::kBuiltinContinuationFrame:
       return SourcePosition::Unknown();
   }
 }
+compiler::SharedFunctionInfoRef GetSharedFunctionInfo(
+    const DeoptFrame& deopt_frame) {
+  switch (deopt_frame.type()) {
+    case DeoptFrame::FrameType::kInterpretedFrame:
+      return deopt_frame.as_interpreted().unit().shared_function_info();
+    case DeoptFrame::FrameType::kInlinedArgumentsFrame:
+      return deopt_frame.as_inlined_arguments().unit().shared_function_info();
+    case DeoptFrame::FrameType::kBuiltinContinuationFrame:
+      return GetSharedFunctionInfo(*deopt_frame.parent());
+  }
+}
 }  // namespace
 
 class MaglevTranslationArrayBuilder {
@@ -1031,24 +1011,32 @@ class MaglevTranslationArrayBuilder {
         deopt_literals_(deopt_literals) {}
 
   void BuildEagerDeopt(EagerDeoptInfo* deopt_info) {
-    int frame_count = GetFrameCount(deopt_info->top_frame());
-    int jsframe_count = frame_count;
-    int update_feedback_count = 0;
+    auto [frame_count, jsframe_count] = GetFrameCount(&deopt_info->top_frame());
     deopt_info->set_translation_index(
-        translation_array_builder_->BeginTranslation(frame_count, jsframe_count,
-                                                     update_feedback_count));
+        translation_array_builder_->BeginTranslation(
+            frame_count, jsframe_count,
+            deopt_info->feedback_to_update().IsValid()));
+    if (deopt_info->feedback_to_update().IsValid()) {
+      translation_array_builder_->AddUpdateFeedback(
+          GetDeoptLiteral(*deopt_info->feedback_to_update().vector),
+          deopt_info->feedback_to_update().index());
+    }
 
     const InputLocation* current_input_location = deopt_info->input_locations();
     BuildDeoptFrame(deopt_info->top_frame(), current_input_location);
   }
 
   void BuildLazyDeopt(LazyDeoptInfo* deopt_info) {
-    int frame_count = GetFrameCount(deopt_info->top_frame());
-    int jsframe_count = frame_count;
-    int update_feedback_count = 0;
+    auto [frame_count, jsframe_count] = GetFrameCount(&deopt_info->top_frame());
     deopt_info->set_translation_index(
-        translation_array_builder_->BeginTranslation(frame_count, jsframe_count,
-                                                     update_feedback_count));
+        translation_array_builder_->BeginTranslation(
+            frame_count, jsframe_count,
+            deopt_info->feedback_to_update().IsValid()));
+    if (deopt_info->feedback_to_update().IsValid()) {
+      translation_array_builder_->AddUpdateFeedback(
+          GetDeoptLiteral(*deopt_info->feedback_to_update().vector),
+          deopt_info->feedback_to_update().index());
+    }
 
     const InputLocation* current_input_location = deopt_info->input_locations();
 
@@ -1090,8 +1078,7 @@ class MaglevTranslationArrayBuilder {
         }
         translation_array_builder_->BeginInterpretedFrame(
             interpreted_frame.bytecode_position(),
-            GetDeoptLiteral(
-                *interpreted_frame.unit().shared_function_info().object()),
+            GetDeoptLiteral(GetSharedFunctionInfo(interpreted_frame)),
             interpreted_frame.unit().register_count(), return_offset,
             deopt_info->result_size());
 
@@ -1101,6 +1088,9 @@ class MaglevTranslationArrayBuilder {
             deopt_info->result_size());
         break;
       }
+      case DeoptFrame::FrameType::kInlinedArgumentsFrame:
+        // The inlined arguments frame can never be the top frame.
+        UNREACHABLE();
       case DeoptFrame::FrameType::kBuiltinContinuationFrame: {
         const BuiltinContinuationDeoptFrame& builtin_continuation_frame =
             top_frame.as_builtin_continuation();
@@ -1108,11 +1098,7 @@ class MaglevTranslationArrayBuilder {
         translation_array_builder_->BeginBuiltinContinuationFrame(
             Builtins::GetContinuationBytecodeOffset(
                 builtin_continuation_frame.builtin_id()),
-            GetDeoptLiteral(*builtin_continuation_frame.parent()
-                                 ->as_interpreted()
-                                 .unit()
-                                 .shared_function_info()
-                                 .object()),
+            GetDeoptLiteral(GetSharedFunctionInfo(builtin_continuation_frame)),
             builtin_continuation_frame.parameters().length());
 
         // Closure
@@ -1168,8 +1154,7 @@ class MaglevTranslationArrayBuilder {
         const int return_count = 0;
         translation_array_builder_->BeginInterpretedFrame(
             interpreted_frame.bytecode_position(),
-            GetDeoptLiteral(
-                *interpreted_frame.unit().shared_function_info().object()),
+            GetDeoptLiteral(GetSharedFunctionInfo(interpreted_frame)),
             interpreted_frame.unit().register_count(), return_offset,
             return_count);
 
@@ -1179,6 +1164,28 @@ class MaglevTranslationArrayBuilder {
             return_count);
         break;
       }
+      case DeoptFrame::FrameType::kInlinedArgumentsFrame: {
+        const InlinedArgumentsDeoptFrame& inlined_arguments_frame =
+            frame.as_inlined_arguments();
+
+        translation_array_builder_->BeginInlinedExtraArguments(
+            GetDeoptLiteral(GetSharedFunctionInfo(inlined_arguments_frame)),
+            static_cast<uint32_t>(inlined_arguments_frame.arguments().size()));
+
+        // Closure
+        translation_array_builder_->StoreLiteral(
+            GetDeoptLiteral(inlined_arguments_frame.unit().function()));
+
+        // Arguments
+        // TODO(victorgomes): Technically we don't need all arguments, only the
+        // extra ones. But doing this at the moment, since it matches the
+        // TurboFan behaviour.
+        for (ValueNode* value : inlined_arguments_frame.arguments()) {
+          BuildDeoptFrameSingleValue(value, *current_input_location);
+          current_input_location++;
+        }
+        break;
+      }
       case DeoptFrame::FrameType::kBuiltinContinuationFrame: {
         const BuiltinContinuationDeoptFrame& builtin_continuation_frame =
             frame.as_builtin_continuation();
@@ -1186,11 +1193,7 @@ class MaglevTranslationArrayBuilder {
         translation_array_builder_->BeginBuiltinContinuationFrame(
             Builtins::GetContinuationBytecodeOffset(
                 builtin_continuation_frame.builtin_id()),
-            GetDeoptLiteral(*builtin_continuation_frame.parent()
-                                 ->as_interpreted()
-                                 .unit()
-                                 .shared_function_info()
-                                 .object()),
+            GetDeoptLiteral(GetSharedFunctionInfo(builtin_continuation_frame)),
             builtin_continuation_frame.parameters().length());
 
         // Closure
@@ -1215,12 +1218,17 @@ class MaglevTranslationArrayBuilder {
   void BuildDeoptStoreRegister(const compiler::AllocatedOperand& operand,
                                ValueRepresentation repr) {
     switch (repr) {
+      case ValueRepresentation::kWord64:
+        UNREACHABLE();
       case ValueRepresentation::kTagged:
         translation_array_builder_->StoreRegister(operand.GetRegister());
         break;
       case ValueRepresentation::kInt32:
         translation_array_builder_->StoreInt32Register(operand.GetRegister());
         break;
+      case ValueRepresentation::kUint32:
+        translation_array_builder_->StoreUint32Register(operand.GetRegister());
+        break;
       case ValueRepresentation::kFloat64:
         translation_array_builder_->StoreDoubleRegister(
             operand.GetDoubleRegister());
@@ -1232,12 +1240,17 @@ class MaglevTranslationArrayBuilder {
                                 ValueRepresentation repr) {
     int stack_slot = DeoptStackSlotFromStackSlot(operand);
     switch (repr) {
+      case ValueRepresentation::kWord64:
+        UNREACHABLE();
       case ValueRepresentation::kTagged:
         translation_array_builder_->StoreStackSlot(stack_slot);
         break;
       case ValueRepresentation::kInt32:
         translation_array_builder_->StoreInt32StackSlot(stack_slot);
         break;
+      case ValueRepresentation::kUint32:
+        translation_array_builder_->StoreUint32StackSlot(stack_slot);
+        break;
       case ValueRepresentation::kFloat64:
         translation_array_builder_->StoreDoubleStackSlot(stack_slot);
         break;
@@ -1273,7 +1286,7 @@ class MaglevTranslationArrayBuilder {
       translation_array_builder_->StoreStackSlot(closure_index);
     } else {
       translation_array_builder_->StoreLiteral(
-          GetDeoptLiteral(*compilation_unit.function().object()));
+          GetDeoptLiteral(compilation_unit.function()));
     }
 
     // TODO(leszeks): The input locations array happens to be in the same order
@@ -1346,6 +1359,10 @@ class MaglevTranslationArrayBuilder {
     return *res.entry;
   }
 
+  int GetDeoptLiteral(compiler::HeapObjectRef ref) {
+    return GetDeoptLiteral(*ref.object());
+  }
+
   LocalIsolate* local_isolate_;
   MaglevAssembler* masm_;
   TranslationArrayBuilder* translation_array_builder_;
@@ -1381,10 +1398,29 @@ void MaglevCodeGenerator::EmitCode() {
                                     MaglevCodeGeneratingNodeProcessor>>
       processor(SafepointingNodeProcessor{local_isolate_},
                 MaglevCodeGeneratingNodeProcessor{masm()});
+  RecordInlinedFunctions();
   processor.ProcessGraph(graph_);
   EmitDeferredCode();
   EmitDeopts();
+  if (code_gen_failed_) return;
   EmitExceptionHandlerTrampolines();
+  __ FinishCode();
+}
+
+void MaglevCodeGenerator::RecordInlinedFunctions() {
+  // The inlined functions should be the first literals.
+  DCHECK_EQ(0u, deopt_literals_.size());
+  for (OptimizedCompilationInfo::InlinedFunctionHolder& inlined :
+       graph_->inlined_functions()) {
+    IdentityMapFindResult<int> res =
+        deopt_literals_.FindOrInsert(inlined.shared_info);
+    if (!res.already_exists) {
+      DCHECK_EQ(0, *res.entry);
+      *res.entry = deopt_literals_.size() - 1;
+    }
+    inlined.RegisterInlinedFunctionId(*res.entry);
+  }
+  inlined_function_count_ = static_cast<int>(deopt_literals_.size());
 }
 
 void MaglevCodeGenerator::EmitDeferredCode() {
@@ -1401,9 +1437,28 @@ void MaglevCodeGenerator::EmitDeferredCode() {
 }
 
 void MaglevCodeGenerator::EmitDeopts() {
+  const size_t num_deopts = code_gen_state_.eager_deopts().size() +
+                            code_gen_state_.lazy_deopts().size();
+  if (num_deopts > Deoptimizer::kMaxNumberOfEntries) {
+    code_gen_failed_ = true;
+    return;
+  }
+
   MaglevTranslationArrayBuilder translation_builder(
       local_isolate_, &masm_, &translation_array_builder_, &deopt_literals_);
 
+  // Deoptimization exits must be as small as possible, since their count grows
+  // with function size. These labels are an optimization which extracts the
+  // (potentially large) instruction sequence for the final jump to the
+  // deoptimization entry into a single spot per InstructionStream object. All
+  // deopt exits can then near-call to this label. Note: not used on all
+  // architectures.
+  Label eager_deopt_entry;
+  Label lazy_deopt_entry;
+  __ MaybeEmitDeoptBuiltinsCall(
+      code_gen_state_.eager_deopts().size(), &eager_deopt_entry,
+      code_gen_state_.lazy_deopts().size(), &lazy_deopt_entry);
+
   deopt_exit_start_offset_ = __ pc_offset();
 
   int deopt_index = 0;
@@ -1413,15 +1468,23 @@ void MaglevCodeGenerator::EmitDeopts() {
     local_isolate_->heap()->Safepoint();
     translation_builder.BuildEagerDeopt(deopt_info);
 
-    if (masm_.compilation_info()->collect_source_positions()) {
+    if (masm_.compilation_info()->collect_source_positions() ||
+        IsDeoptimizationWithoutCodeInvalidation(deopt_info->reason())) {
+      // Note: Maglev uses the deopt_reason to tell the deoptimizer not to
+      // discard optimized code on deopt during ML-TF OSR. This is why we
+      // unconditionally emit the deopt_reason when
+      // IsDeoptimizationWithoutCodeInvalidation is true.
       __ RecordDeoptReason(deopt_info->reason(), 0,
                            GetSourcePosition(deopt_info->top_frame()),
                            deopt_index);
     }
     __ bind(deopt_info->deopt_entry_label());
+
     __ CallForDeoptimization(Builtin::kDeoptimizationEntry_Eager, deopt_index,
                              deopt_info->deopt_entry_label(),
-                             DeoptimizeKind::kEager, nullptr, nullptr);
+                             DeoptimizeKind::kEager, nullptr,
+                             &eager_deopt_entry);
+
     deopt_index++;
   }
 
@@ -1436,10 +1499,11 @@ void MaglevCodeGenerator::EmitDeopts() {
                            GetSourcePosition(deopt_info->top_frame()),
                            deopt_index);
     }
-    __ bind(deopt_info->deopt_entry_label());
+    __ BindExceptionHandler(deopt_info->deopt_entry_label());
+
     __ CallForDeoptimization(Builtin::kDeoptimizationEntry_Lazy, deopt_index,
                              deopt_info->deopt_entry_label(),
-                             DeoptimizeKind::kLazy, nullptr, nullptr);
+                             DeoptimizeKind::kLazy, nullptr, &lazy_deopt_entry);
 
     last_updated_safepoint = safepoint_table_builder_.UpdateDeoptimizationInfo(
         deopt_info->deopting_call_return_pc(),
@@ -1459,7 +1523,7 @@ void MaglevCodeGenerator::EmitExceptionHandlerTrampolines() {
 
 void MaglevCodeGenerator::EmitMetadata() {
   // Final alignment before starting on the metadata section.
-  masm()->Align(Code::kMetadataAlignment);
+  masm()->Align(InstructionStream::kMetadataAlignment);
 
   safepoint_table_builder_.Emit(masm());
 
@@ -1473,6 +1537,8 @@ void MaglevCodeGenerator::EmitMetadata() {
 }
 
 MaybeHandle<Code> MaglevCodeGenerator::BuildCodeObject(Isolate* isolate) {
+  if (code_gen_failed_) return {};
+
   CodeDesc desc;
   masm()->GetCode(isolate, &desc, &safepoint_table_builder_,
                   handler_table_offset_);
@@ -1501,10 +1567,8 @@ Handle<DeoptimizationData> MaglevCodeGenerator::GenerateDeoptimizationData(
     auto raw_data = *data;
 
     raw_data.SetTranslationByteArray(*translation_array);
-    // TODO(leszeks): Fix with the real inlined function count.
-    raw_data.SetInlinedFunctionCount(Smi::zero());
-    // TODO(leszeks): Support optimization IDs
-    raw_data.SetOptimizationId(Smi::zero());
+    raw_data.SetInlinedFunctionCount(Smi::FromInt(inlined_function_count_));
+    raw_data.SetOptimizationId(Smi::FromInt(isolate->NextOptimizationId()));
 
     DCHECK_NE(deopt_exit_start_offset_, -1);
     raw_data.SetDeoptExitStart(Smi::FromInt(deopt_exit_start_offset_));
@@ -1520,9 +1584,14 @@ Handle<DeoptimizationData> MaglevCodeGenerator::GenerateDeoptimizationData(
   Handle<DeoptimizationLiteralArray> literals =
       isolate->factory()->NewDeoptimizationLiteralArray(deopt_literals_.size() +
                                                         1);
-  // TODO(leszeks): Fix with the real inlining positions.
+  int inlined_functions_size =
+      static_cast<int>(graph_->inlined_functions().size());
   Handle<PodArray<InliningPosition>> inlining_positions =
-      PodArray<InliningPosition>::New(isolate, 0);
+      PodArray<InliningPosition>::New(isolate, inlined_functions_size);
+  for (int i = 0; i < inlined_functions_size; ++i) {
+    inlining_positions->set(i, graph_->inlined_functions()[i].position);
+  }
+
   DisallowGarbageCollection no_gc;
 
   auto raw_literals = *literals;
@@ -1539,8 +1608,6 @@ Handle<DeoptimizationData> MaglevCodeGenerator::GenerateDeoptimizationData(
                                                 ->bytecode()
                                                 .object());
   raw_data.SetLiteralArray(raw_literals);
-
-  // TODO(leszeks): Fix with the real inlining positions.
   raw_data.SetInliningPositions(*inlining_positions);
 
   // TODO(leszeks): Fix once we have OSR.