1 files changed, 236 insertions, 152 deletions
diff --git a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
index 4c5accd7a8..43c94b3e9d 100644
--- a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
+++ b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc
@@ -29,7 +29,7 @@ namespace v8 {
 namespace internal {
 namespace compiler {
 
-#define __ tasm()->
+#define __ masm()->
 
 // Adds Arm-specific methods to convert InstructionOperands.
 class ArmOperandConverter final : public InstructionOperandConverter {
@@ -193,9 +193,8 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
   }
 
   void Generate() final {
-    __ CheckPageFlag(
-        value_, MemoryChunk::kPointersToHereAreInterestingOrInSharedHeapMask,
-        eq, exit());
+    __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, eq,
+                     exit());
     SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
                                             ? SaveFPRegsMode::kSave
                                             : SaveFPRegsMode::kIgnore;
@@ -415,7 +414,7 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
   do {                                                                         \
     /* TODO(bmeurer): We should really get rid of this special instruction, */ \
     /* and generate a CallAddress instruction instead. */                      \
-    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    FrameScope scope(masm(), StackFrame::MANUAL);                              \
     __ PrepareCallCFunction(0, 2);                                             \
     __ MovToFloatParameters(i.InputDoubleRegister(0),                          \
                             i.InputDoubleRegister(1));                         \
@@ -429,7 +428,7 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
   do {                                                                         \
     /* TODO(bmeurer): We should really get rid of this special instruction, */ \
     /* and generate a CallAddress instruction instead. */                      \
-    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    FrameScope scope(masm(), StackFrame::MANUAL);                              \
     __ PrepareCallCFunction(0, 1);                                             \
     __ MovToFloatParameter(i.InputDoubleRegister(0));                          \
     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1);    \
@@ -473,7 +472,7 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
     if (instr->InputAt(1)->IsImmediate()) {              \
       __ asm_imm(dt, dst, src, i.InputInt##width(1));    \
     } else {                                             \
-      UseScratchRegisterScope temps(tasm());             \
+      UseScratchRegisterScope temps(masm());             \
       Simd128Register tmp = temps.AcquireQ();            \
       Register shift = temps.Acquire();                  \
       constexpr int mask = (1 << width) - 1;             \
@@ -493,7 +492,7 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
     if (instr->InputAt(1)->IsImmediate()) {               \
       __ asm_imm(dt, dst, src, i.InputInt##width(1));     \
     } else {                                              \
-      UseScratchRegisterScope temps(tasm());              \
+      UseScratchRegisterScope temps(masm());              \
       Simd128Register tmp = temps.AcquireQ();             \
       Register shift = temps.Acquire();                   \
       constexpr int mask = (1 << width) - 1;              \
@@ -518,20 +517,20 @@ void CodeGenerator::AssemblePrepareTailCall() {
 
 namespace {
 
-void FlushPendingPushRegisters(TurboAssembler* tasm,
+void FlushPendingPushRegisters(MacroAssembler* masm,
                                FrameAccessState* frame_access_state,
                                ZoneVector<Register>* pending_pushes) {
   switch (pending_pushes->size()) {
     case 0:
       break;
     case 1:
-      tasm->push((*pending_pushes)[0]);
+      masm->push((*pending_pushes)[0]);
       break;
     case 2:
-      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
+      masm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
       break;
     case 3:
-      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
+      masm->Push((*pending_pushes)[0], (*pending_pushes)[1],
                  (*pending_pushes)[2]);
       break;
     default:
@@ -542,7 +541,7 @@ void FlushPendingPushRegisters(TurboAssembler* tasm,
 }
 
 void AdjustStackPointerForTailCall(
-    TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
+    MacroAssembler* masm, FrameAccessState* state, int new_slot_above_sp,
     ZoneVector<Register>* pending_pushes = nullptr,
     bool allow_shrinkage = true) {
   int current_sp_offset = state->GetSPToFPSlotCount() +
@@ -550,15 +549,15 @@ void AdjustStackPointerForTailCall(
   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
   if (stack_slot_delta > 0) {
     if (pending_pushes != nullptr) {
-      FlushPendingPushRegisters(tasm, state, pending_pushes);
+      FlushPendingPushRegisters(masm, state, pending_pushes);
     }
-    tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
+    masm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
     state->IncreaseSPDelta(stack_slot_delta);
   } else if (allow_shrinkage && stack_slot_delta < 0) {
     if (pending_pushes != nullptr) {
-      FlushPendingPushRegisters(tasm, state, pending_pushes);
+      FlushPendingPushRegisters(masm, state, pending_pushes);
     }
-    tasm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
+    masm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
     state->IncreaseSPDelta(stack_slot_delta);
   }
 }
@@ -601,7 +600,7 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
           LocationOperand::cast(move->destination()));
       InstructionOperand source(move->source());
       AdjustStackPointerForTailCall(
-          tasm(), frame_access_state(),
+          masm(), frame_access_state(),
           destination_location.index() - pending_pushes.size(),
           &pending_pushes);
       // Pushes of non-register data types are not supported.
@@ -611,26 +610,26 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
       // TODO(arm): We can push more than 3 registers at once. Add support in
       // the macro-assembler for pushing a list of registers.
       if (pending_pushes.size() == 3) {
-        FlushPendingPushRegisters(tasm(), frame_access_state(),
+        FlushPendingPushRegisters(masm(), frame_access_state(),
                                   &pending_pushes);
       }
       move->Eliminate();
     }
-    FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
+    FlushPendingPushRegisters(masm(), frame_access_state(), &pending_pushes);
   }
-  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+  AdjustStackPointerForTailCall(masm(), frame_access_state(),
                                 first_unused_slot_offset, nullptr, false);
 }
 
 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
                                              int first_unused_slot_offset) {
-  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+  AdjustStackPointerForTailCall(masm(), frame_access_state(),
                                 first_unused_slot_offset);
 }
 
 // Check that {kJavaScriptCallCodeStartRegister} is correct.
 void CodeGenerator::AssembleCodeStartRegisterCheck() {
-  UseScratchRegisterScope temps(tasm());
+  UseScratchRegisterScope temps(masm());
   Register scratch = temps.Acquire();
   __ ComputeCodeStartAddress(scratch);
   __ cmp(scratch, kJavaScriptCallCodeStartRegister);
@@ -641,16 +640,15 @@ void CodeGenerator::AssembleCodeStartRegisterCheck() {
 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
 // to:
 //    1. read from memory the word that contains that bit, which can be found in
-//       the flags in the referenced {CodeDataContainer} object;
+//       the flags in the referenced {Code} object;
 //    2. test kMarkedForDeoptimizationBit in those flags; and
 //    3. if it is not zero then it jumps to the builtin.
 void CodeGenerator::BailoutIfDeoptimized() {
-  UseScratchRegisterScope temps(tasm());
+  UseScratchRegisterScope temps(masm());
   Register scratch = temps.Acquire();
-  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  int offset = InstructionStream::kCodeOffset - InstructionStream::kHeaderSize;
   __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
-  __ ldr(scratch,
-         FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
+  __ ldr(scratch, FieldMemOperand(scratch, Code::kKindSpecificFlagsOffset));
   __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
           RelocInfo::CODE_TARGET, ne);
@@ -748,7 +746,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArchCallJSFunction: {
       Register func = i.InputRegister(0);
       if (v8_flags.debug_code) {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         Register scratch = temps.Acquire();
         // Check the function's context matches the context argument.
         __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
@@ -859,7 +857,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       {
         // We don't actually want to generate a pile of code for this, so just
         // claim there is a stack frame, without generating one.
-        FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
+        FrameScope scope(masm(), StackFrame::NO_FRAME_TYPE);
         __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
                 RelocInfo::CODE_TARGET);
       }
@@ -931,7 +929,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArchAtomicStoreWithWriteBarrier: {
       RecordWriteMode mode;
       if (arch_opcode == kArchStoreWithWriteBarrier) {
-        mode = static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+        mode = RecordWriteModeField::decode(instr->opcode());
       } else {
         mode = AtomicStoreRecordWriteModeField::decode(instr->opcode());
       }
@@ -1070,7 +1068,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
              i.InputRegister(2), i.OutputSBit());
       break;
     case kArmMls: {
-      CpuFeatureScope scope(tasm(), ARMv7);
+      CpuFeatureScope scope(masm(), ARMv7);
       __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
              i.InputRegister(2));
       DCHECK_EQ(LeaveCC, i.OutputSBit());
@@ -1094,13 +1092,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
                i.InputRegister(1), i.OutputSBit());
       break;
     case kArmSdiv: {
-      CpuFeatureScope scope(tasm(), SUDIV);
+      CpuFeatureScope scope(masm(), SUDIV);
       __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
       DCHECK_EQ(LeaveCC, i.OutputSBit());
       break;
     }
     case kArmUdiv: {
-      CpuFeatureScope scope(tasm(), SUDIV);
+      CpuFeatureScope scope(masm(), SUDIV);
       __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
       DCHECK_EQ(LeaveCC, i.OutputSBit());
       break;
@@ -1128,20 +1126,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
              i.OutputSBit());
       break;
     case kArmBfc: {
-      CpuFeatureScope scope(tasm(), ARMv7);
+      CpuFeatureScope scope(masm(), ARMv7);
       __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
       DCHECK_EQ(LeaveCC, i.OutputSBit());
       break;
     }
     case kArmUbfx: {
-      CpuFeatureScope scope(tasm(), ARMv7);
+      CpuFeatureScope scope(masm(), ARMv7);
       __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
               i.InputInt8(2));
       DCHECK_EQ(LeaveCC, i.OutputSBit());
       break;
     }
     case kArmSbfx: {
-      CpuFeatureScope scope(tasm(), ARMv7);
+      CpuFeatureScope scope(masm(), ARMv7);
       __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
               i.InputInt8(2));
       DCHECK_EQ(LeaveCC, i.OutputSBit());
@@ -1184,7 +1182,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       DCHECK_EQ(LeaveCC, i.OutputSBit());
       break;
     case kArmRbit: {
-      CpuFeatureScope scope(tasm(), ARMv7);
+      CpuFeatureScope scope(masm(), ARMv7);
       __ rbit(i.OutputRegister(), i.InputRegister(0));
       DCHECK_EQ(LeaveCC, i.OutputSBit());
       break;
@@ -1379,7 +1377,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmVmodF64: {
       // TODO(bmeurer): We should really get rid of this special instruction,
       // and generate a CallAddress instruction instead.
-      FrameScope scope(tasm(), StackFrame::MANUAL);
+      FrameScope scope(masm(), StackFrame::MANUAL);
       __ PrepareCallCFunction(0, 2);
       __ MovToFloatParameters(i.InputDoubleRegister(0),
                               i.InputDoubleRegister(1));
@@ -1399,7 +1397,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
       break;
     case kArmVrintmF32: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       if (instr->InputAt(0)->IsSimd128Register()) {
         __ vrintm(NeonS32, i.OutputSimd128Register(),
                   i.InputSimd128Register(0));
@@ -1409,12 +1407,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVrintmF64: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
       break;
     }
     case kArmVrintpF32: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       if (instr->InputAt(0)->IsSimd128Register()) {
         __ vrintp(NeonS32, i.OutputSimd128Register(),
                   i.InputSimd128Register(0));
@@ -1424,12 +1422,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVrintpF64: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
       break;
     }
     case kArmVrintzF32: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       if (instr->InputAt(0)->IsSimd128Register()) {
         __ vrintz(NeonS32, i.OutputSimd128Register(),
                   i.InputSimd128Register(0));
@@ -1439,17 +1437,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVrintzF64: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
       break;
     }
     case kArmVrintaF64: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
       break;
     }
     case kArmVrintnF32: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       if (instr->InputAt(0)->IsSimd128Register()) {
         __ vrintn(NeonS32, i.OutputSimd128Register(),
                   i.InputSimd128Register(0));
@@ -1459,7 +1457,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVrintnF64: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
       break;
     }
@@ -1474,7 +1472,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtF32S32: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vmov(scratch, i.InputRegister(0));
       __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
@@ -1482,7 +1480,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtF32U32: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vmov(scratch, i.InputRegister(0));
       __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
@@ -1490,7 +1488,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtF64S32: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vmov(scratch, i.InputRegister(0));
       __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
@@ -1498,7 +1496,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtF64U32: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vmov(scratch, i.InputRegister(0));
       __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
@@ -1506,7 +1504,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtS32F32: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
       __ vmov(i.OutputRegister(), scratch);
@@ -1521,7 +1519,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtU32F32: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
       __ vmov(i.OutputRegister(), scratch);
@@ -1536,7 +1534,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtS32F64: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
       __ vmov(i.OutputRegister(), scratch);
@@ -1544,7 +1542,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmVcvtU32F64: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       SwVfpRegister scratch = temps.AcquireS();
       __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
       __ vmov(i.OutputRegister(), scratch);
@@ -1763,7 +1761,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
           __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
         } else {
           DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
-          UseScratchRegisterScope temps(tasm());
+          UseScratchRegisterScope temps(masm());
           Register scratch = temps.Acquire();
           __ add(scratch, fp, Operand(offset));
           __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
@@ -1900,7 +1898,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
 #undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
     case kArmF64x2Eq: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Register scratch = temps.Acquire();
       __ mov(scratch, Operand(0));
       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
@@ -1916,7 +1914,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmF64x2Ne: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Register scratch = temps.Acquire();
       __ mov(scratch, Operand(0));
       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
@@ -1932,7 +1930,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmF64x2Lt: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Register scratch = temps.Acquire();
       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
                                i.InputSimd128Register(1).low());
@@ -1948,7 +1946,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmF64x2Le: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Register scratch = temps.Acquire();
       __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
                                i.InputSimd128Register(1).low());
@@ -1989,8 +1987,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       __ vmov(dst.high(), rhs.high(), gt);
       break;
     }
+    case kArmF64x2Qfma: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      __ vmul(dst.low(), src0.low(), src1.low());
+      __ vmul(dst.high(), src0.high(), src1.high());
+      __ vadd(dst.low(), src2.low(), dst.low());
+      __ vadd(dst.high(), src2.high(), dst.high());
+      break;
+    }
+    case kArmF64x2Qfms: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      __ vmul(dst.low(), src0.low(), src1.low());
+      __ vmul(dst.high(), src0.high(), src1.high());
+      __ vsub(dst.low(), src2.low(), dst.low());
+      __ vsub(dst.high(), src2.high(), dst.high());
+      break;
+    }
     case kArmF64x2Ceil: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       Simd128Register dst = i.OutputSimd128Register();
       Simd128Register src = i.InputSimd128Register(0);
       __ vrintp(dst.low(), src.low());
@@ -1998,7 +2018,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmF64x2Floor: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       Simd128Register dst = i.OutputSimd128Register();
       Simd128Register src = i.InputSimd128Register(0);
       __ vrintm(dst.low(), src.low());
@@ -2006,7 +2026,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmF64x2Trunc: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       Simd128Register dst = i.OutputSimd128Register();
       Simd128Register src = i.InputSimd128Register(0);
       __ vrintz(dst.low(), src.low());
@@ -2014,7 +2034,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmF64x2NearestInt: {
-      CpuFeatureScope scope(tasm(), ARMv8);
+      CpuFeatureScope scope(masm(), ARMv8);
       Simd128Register dst = i.OutputSimd128Register();
       Simd128Register src = i.InputSimd128Register(0);
       __ vrintn(dst.low(), src.low());
@@ -2061,7 +2081,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmI64x2Mul: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       QwNeonRegister dst = i.OutputSimd128Register();
       QwNeonRegister left = i.InputSimd128Register(0);
       QwNeonRegister right = i.InputSimd128Register(1);
@@ -2289,6 +2309,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       __ vbsl(dst, rhs, lhs);
       break;
     }
+    case kArmF32x4Qfma: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vmul(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ vadd(dst, i.InputSimd128Register(2), dst);
+      break;
+    }
+    case kArmF32x4Qfms: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vmul(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ vsub(dst, i.InputSimd128Register(2), dst);
+      break;
+    }
     case kArmF32x4DemoteF64x2Zero: {
       Simd128Register dst = i.OutputSimd128Register();
       Simd128Register src = i.InputSimd128Register(0);
@@ -2448,7 +2480,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kArmI32x4BitMask: {
       Register dst = i.OutputRegister();
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register src = i.InputSimd128Register(0);
       Simd128Register tmp = temps.AcquireQ();
       Simd128Register mask = i.TempSimd128Register(0);
@@ -2468,15 +2500,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       Simd128Register dst = i.OutputSimd128Register();
       Simd128Register lhs = i.InputSimd128Register(0);
       Simd128Register rhs = i.InputSimd128Register(1);
-      Simd128Register tmp1 = i.TempSimd128Register(0);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
-      __ vmull(NeonS16, tmp1, lhs.low(), rhs.low());
+      __ vmull(NeonS16, scratch, lhs.low(), rhs.low());
+      __ vpadd(Neon32, dst.low(), scratch.low(), scratch.high());
       __ vmull(NeonS16, scratch, lhs.high(), rhs.high());
-      __ vpadd(Neon32, dst.low(), tmp1.low(), tmp1.high());
       __ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
       break;
     }
+    case kArmI16x8DotI8x16S: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(masm());
+      Simd128Register scratch = temps.AcquireQ();
+      __ vmull(NeonS8, scratch, lhs.low(), rhs.low());
+      __ vpadd(Neon16, dst.low(), scratch.low(), scratch.high());
+      __ vmull(NeonS8, scratch, lhs.high(), rhs.high());
+      __ vpadd(Neon16, dst.high(), scratch.low(), scratch.high());
+      break;
+    }
+    case kArmI32x4DotI8x16AddS: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      Simd128Register tmp1 = i.TempSimd128Register(0);
+      DCHECK_EQ(dst, i.InputSimd128Register(2));
+      UseScratchRegisterScope temps(masm());
+      Simd128Register scratch = temps.AcquireQ();
+      __ vmull(NeonS8, scratch, lhs.low(), rhs.low());
+      __ vpadd(Neon16, tmp1.low(), scratch.low(), scratch.high());
+      __ vmull(NeonS8, scratch, lhs.high(), rhs.high());
+      __ vpadd(Neon16, tmp1.high(), scratch.low(), scratch.high());
+      __ vpadal(NeonS16, dst, tmp1);
+      break;
+    }
     case kArmI32x4TruncSatF64x2SZero: {
       Simd128Register dst = i.OutputSimd128Register();
       Simd128Register src = i.InputSimd128Register(0);
@@ -2651,7 +2709,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmI16x8BitMask: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Register dst = i.OutputRegister();
       Simd128Register src = i.InputSimd128Register(0);
       Simd128Register tmp = temps.AcquireQ();
@@ -2806,7 +2864,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kArmI8x16BitMask: {
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Register dst = i.OutputRegister();
       Simd128Register src = i.InputSimd128Register(0);
       Simd128Register tmp = temps.AcquireQ();
@@ -2907,7 +2965,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
       DCHECK(dst == i.InputSimd128Register(0));
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
       __ vmov(scratch, src1);
@@ -2918,7 +2976,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
       DCHECK(dst == i.InputSimd128Register(0));
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
       __ vmov(scratch, src1);
@@ -2929,7 +2987,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
       DCHECK(dst == i.InputSimd128Register(0));
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
       __ vmov(scratch, src1);
@@ -2962,7 +3020,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS32x4TransposeRight: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
@@ -2991,7 +3049,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS16x8UnzipLeft: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
@@ -3002,7 +3060,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS16x8UnzipRight: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
@@ -3013,7 +3071,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS16x8TransposeLeft: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
@@ -3024,7 +3082,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS16x8TransposeRight: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
@@ -3053,7 +3111,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS8x16UnzipLeft: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
@@ -3064,7 +3122,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS8x16UnzipRight: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
@@ -3075,7 +3133,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS8x16TransposeLeft: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
@@ -3086,7 +3144,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArmS8x16TransposeRight: {
       Simd128Register dst = i.OutputSimd128Register(),
                       src1 = i.InputSimd128Register(1);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       DCHECK(dst == i.InputSimd128Register(0));
       // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
@@ -3113,7 +3171,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
                       src0 = i.InputSimd128Register(0),
                       src1 = i.InputSimd128Register(1);
       DwVfpRegister table_base = src0.low();
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       Simd128Register scratch = temps.AcquireQ();
       // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
       // src1. They must be consecutive.
@@ -3164,7 +3222,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kArmV128AnyTrue: {
       const QwNeonRegister& src = i.InputSimd128Register(0);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       DwVfpRegister scratch = temps.AcquireD();
       __ vpmax(NeonU32, scratch, src.low(), src.high());
       __ vpmax(NeonU32, scratch, scratch, scratch);
@@ -3179,7 +3237,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kArmI32x4AllTrue: {
       const QwNeonRegister& src = i.InputSimd128Register(0);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       DwVfpRegister scratch = temps.AcquireD();
       __ vpmin(NeonU32, scratch, src.low(), src.high());
       __ vpmin(NeonU32, scratch, scratch, scratch);
@@ -3190,7 +3248,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kArmI16x8AllTrue: {
       const QwNeonRegister& src = i.InputSimd128Register(0);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       DwVfpRegister scratch = temps.AcquireD();
       __ vpmin(NeonU16, scratch, src.low(), src.high());
       __ vpmin(NeonU16, scratch, scratch, scratch);
@@ -3202,7 +3260,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kArmI8x16AllTrue: {
       const QwNeonRegister& src = i.InputSimd128Register(0);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       DwVfpRegister scratch = temps.AcquireD();
       __ vpmin(NeonU8, scratch, src.low(), src.high());
       __ vpmin(NeonU8, scratch, scratch, scratch);
@@ -3699,6 +3757,10 @@ void CodeGenerator::AssembleConstructFrame() {
       if (call_descriptor->IsWasmFunctionCall() ||
           call_descriptor->IsWasmImportWrapper() ||
           call_descriptor->IsWasmCapiFunction()) {
+        // For import wrappers and C-API functions, this stack slot is only used
+        // for printing stack traces in V8. Also, it holds a WasmApiFunctionRef
+        // instead of the instance itself, which is taken care of in the frames
+        // accessors.
         __ Push(kWasmInstanceRegister);
       }
       if (call_descriptor->IsWasmCapiFunction()) {
@@ -3744,7 +3806,7 @@ void CodeGenerator::AssembleConstructFrame() {
       // exception unconditionally. Thereby we can avoid the integer overflow
       // check in the condition code.
       if (required_slots * kSystemPointerSize < v8_flags.stack_size * KB) {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         Register scratch = temps.Acquire();
         __ ldr(scratch, FieldMemOperand(
                             kWasmInstanceRegister,
@@ -3870,8 +3932,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
       __ cmp(argc_reg, Operand(parameter_slots));
       __ mov(argc_reg, Operand(parameter_slots), LeaveCC, lt);
     }
-    __ DropArguments(argc_reg, TurboAssembler::kCountIsInteger,
-                     TurboAssembler::kCountIncludesReceiver);
+    __ DropArguments(argc_reg, MacroAssembler::kCountIsInteger,
+                     MacroAssembler::kCountIncludesReceiver);
   } else if (additional_pop_count->IsImmediate()) {
     DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
     int additional_count = g.ToConstant(additional_pop_count).ToInt32();
@@ -3941,7 +4003,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
       } else if (source->IsDoubleRegister()) {
         __ vstr(g.ToDoubleRegister(source), dst);
       } else {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         Register temp = temps.Acquire();
         QwNeonRegister src = g.ToSimd128Register(source);
         __ add(temp, dst.rn(), Operand(dst.offset()));
@@ -3962,7 +4024,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
       } else if (source->IsDoubleStackSlot()) {
         __ vldr(g.ToDoubleRegister(destination), src);
       } else {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         Register temp = temps.Acquire();
         QwNeonRegister dst = g.ToSimd128Register(destination);
         __ add(temp, src.rn(), Operand(src.offset()));
@@ -3973,7 +4035,7 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
     case MoveType::kStackToStack: {
       MemOperand src = g.ToMemOperand(source);
       MemOperand dst = g.ToMemOperand(destination);
-      UseScratchRegisterScope temps(tasm());
+      UseScratchRegisterScope temps(masm());
       if (source->IsStackSlot() || source->IsFloatStackSlot()) {
         SwVfpRegister temp = temps.AcquireS();
         __ vldr(temp, src);
@@ -4011,27 +4073,27 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
       Constant src = g.ToConstant(source);
       MemOperand dst = g.ToMemOperand(destination);
       if (destination->IsStackSlot()) {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         // Acquire a S register instead of a general purpose register in case
         // `vstr` needs one to compute the address of `dst`.
         SwVfpRegister s_temp = temps.AcquireS();
         {
           // TODO(arm): This sequence could be optimized further if necessary by
           // writing the constant directly into `s_temp`.
-          UseScratchRegisterScope temps(tasm());
+          UseScratchRegisterScope temps(masm());
           Register temp = temps.Acquire();
           MoveConstantToRegister(temp, src);
           __ vmov(s_temp, temp);
         }
         __ vstr(s_temp, dst);
       } else if (destination->IsFloatStackSlot()) {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         SwVfpRegister temp = temps.AcquireS();
         __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
         __ vstr(temp, dst);
       } else {
         DCHECK(destination->IsDoubleStackSlot());
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         DwVfpRegister temp = temps.AcquireD();
         // TODO(arm): Look into optimizing this further if possible. Supporting
         // the NEON version of VMOV may help.
@@ -4044,10 +4106,70 @@ void CodeGenerator::AssembleMove(InstructionOperand* source,
   UNREACHABLE();
 }
 
-void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
-  // Must be kept in sync with {MoveTempLocationTo}.
+AllocatedOperand CodeGenerator::Push(InstructionOperand* source) {
   auto rep = LocationOperand::cast(source)->representation();
-  move_cycle_.temps.emplace(tasm());
+  int new_slots = ElementSizeInPointers(rep);
+  ArmOperandConverter g(this, nullptr);
+  int last_frame_slot_id =
+      frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
+  int sp_delta = frame_access_state_->sp_delta();
+  int slot_id = last_frame_slot_id + sp_delta + new_slots;
+  AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
+  if (source->IsRegister()) {
+    __ push(g.ToRegister(source));
+    frame_access_state()->IncreaseSPDelta(new_slots);
+  } else if (source->IsStackSlot()) {
+    UseScratchRegisterScope temps(masm());
+    Register scratch = temps.Acquire();
+    __ ldr(scratch, g.ToMemOperand(source));
+    __ push(scratch);
+    frame_access_state()->IncreaseSPDelta(new_slots);
+  } else {
+    // No push instruction for this operand type. Bump the stack pointer and
+    // assemble the move.
+    __ sub(sp, sp, Operand(new_slots * kSystemPointerSize));
+    frame_access_state()->IncreaseSPDelta(new_slots);
+    AssembleMove(source, &stack_slot);
+  }
+  temp_slots_ += new_slots;
+  return stack_slot;
+}
+
+void CodeGenerator::Pop(InstructionOperand* dest, MachineRepresentation rep) {
+  int new_slots = ElementSizeInPointers(rep);
+  frame_access_state()->IncreaseSPDelta(-new_slots);
+  ArmOperandConverter g(this, nullptr);
+  if (dest->IsRegister()) {
+    __ pop(g.ToRegister(dest));
+  } else if (dest->IsStackSlot()) {
+    UseScratchRegisterScope temps(masm());
+    Register scratch = temps.Acquire();
+    __ pop(scratch);
+    __ str(scratch, g.ToMemOperand(dest));
+  } else {
+    int last_frame_slot_id =
+        frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
+    int sp_delta = frame_access_state_->sp_delta();
+    int slot_id = last_frame_slot_id + sp_delta + new_slots;
+    AllocatedOperand stack_slot(LocationOperand::STACK_SLOT, rep, slot_id);
+    AssembleMove(&stack_slot, dest);
+    __ add(sp, sp, Operand(new_slots * kSystemPointerSize));
+  }
+  temp_slots_ -= new_slots;
+}
+
+void CodeGenerator::PopTempStackSlots() {
+  if (temp_slots_ > 0) {
+    frame_access_state()->IncreaseSPDelta(-temp_slots_);
+    __ add(sp, sp, Operand(temp_slots_ * kSystemPointerSize));
+    temp_slots_ = 0;
+  }
+}
+
+void CodeGenerator::MoveToTempLocation(InstructionOperand* source,
+                                       MachineRepresentation rep) {
+  // Must be kept in sync with {MoveTempLocationTo}.
+  move_cycle_.temps.emplace(masm());
   auto& temps = *move_cycle_.temps;
   // Temporarily exclude the reserved scratch registers while we pick a
   // location to resolve the cycle. Re-include them immediately afterwards so
@@ -4083,27 +4205,7 @@ void CodeGenerator::MoveToTempLocation(InstructionOperand* source) {
   } else {
     // The scratch registers are blocked by pending moves. Use the stack
     // instead.
-    int new_slots = ElementSizeInPointers(rep);
-    ArmOperandConverter g(this, nullptr);
-    if (source->IsRegister()) {
-      __ push(g.ToRegister(source));
-    } else if (source->IsStackSlot()) {
-      UseScratchRegisterScope temps2(tasm());
-      Register scratch = temps2.Acquire();
-      __ ldr(scratch, g.ToMemOperand(source));
-      __ push(scratch);
-    } else {
-      // No push instruction for this operand type. Bump the stack pointer and
-      // assemble the move.
-      int last_frame_slot_id =
-          frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
-      int sp_delta = frame_access_state_->sp_delta();
-      int temp_slot = last_frame_slot_id + sp_delta + new_slots;
-      __ sub(sp, sp, Operand(new_slots * kSystemPointerSize));
-      AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
-      AssembleMove(source, &temp);
-    }
-    frame_access_state()->IncreaseSPDelta(new_slots);
+    Push(source);
   }
 }
 
@@ -4129,25 +4231,7 @@ void CodeGenerator::MoveTempLocationTo(InstructionOperand* dest,
       }
     }
   } else {
-    int new_slots = ElementSizeInPointers(rep);
-    frame_access_state()->IncreaseSPDelta(-new_slots);
-    ArmOperandConverter g(this, nullptr);
-    if (dest->IsRegister()) {
-      __ pop(g.ToRegister(dest));
-    } else if (dest->IsStackSlot()) {
-      UseScratchRegisterScope temps(tasm());
-      Register scratch = temps.Acquire();
-      __ pop(scratch);
-      __ str(scratch, g.ToMemOperand(dest));
-    } else {
-      int last_frame_slot_id =
-          frame_access_state_->frame()->GetTotalFrameSlotCount() - 1;
-      int sp_delta = frame_access_state_->sp_delta();
-      int temp_slot = last_frame_slot_id + sp_delta + new_slots;
-      AllocatedOperand temp(LocationOperand::STACK_SLOT, rep, temp_slot);
-      AssembleMove(&temp, dest);
-      __ add(sp, sp, Operand(new_slots * kSystemPointerSize));
-    }
+    Pop(dest, rep);
   }
   // Restore the default state to release the {UseScratchRegisterScope} and to
   // prepare for the next cycle.
@@ -4159,7 +4243,7 @@ void CodeGenerator::SetPendingMove(MoveOperands* move) {
   InstructionOperand& destination = move->destination();
   MoveType::Type move_type =
       MoveType::InferMove(&move->source(), &move->destination());
-  UseScratchRegisterScope temps(tasm());
+  UseScratchRegisterScope temps(masm());
   if (move_type == MoveType::kStackToStack) {
     if (source.IsStackSlot() || source.IsFloatStackSlot()) {
       SwVfpRegister temp = temps.AcquireS();
@@ -4199,7 +4283,7 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
         DCHECK(destination->IsFloatRegister());
         // GapResolver may give us reg codes that don't map to actual
         // s-registers. Generate code to work around those cases.
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         LowDwVfpRegister temp = temps.AcquireLowD();
         int src_code = LocationOperand::cast(source)->register_code();
         int dst_code = LocationOperand::cast(destination)->register_code();
@@ -4216,20 +4300,20 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
       MemOperand dst = g.ToMemOperand(destination);
       if (source->IsRegister()) {
         Register src = g.ToRegister(source);
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         SwVfpRegister temp = temps.AcquireS();
         __ vmov(temp, src);
         __ ldr(src, dst);
         __ vstr(temp, dst);
       } else if (source->IsFloatRegister()) {
         int src_code = LocationOperand::cast(source)->register_code();
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         LowDwVfpRegister temp = temps.AcquireLowD();
         __ VmovExtended(temp.low().code(), src_code);
         __ VmovExtended(src_code, dst);
         __ vstr(temp.low(), dst);
       } else if (source->IsDoubleRegister()) {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         DwVfpRegister temp = temps.AcquireD();
         DwVfpRegister src = g.ToDoubleRegister(source);
         __ Move(temp, src);
@@ -4237,7 +4321,7 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
         __ vstr(temp, dst);
       } else {
         QwNeonRegister src = g.ToSimd128Register(source);
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         Register temp = temps.Acquire();
         QwNeonRegister temp_q = temps.AcquireQ();
         __ Move(temp_q, src);
@@ -4251,7 +4335,7 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
       MemOperand src = g.ToMemOperand(source);
       MemOperand dst = g.ToMemOperand(destination);
       if (source->IsStackSlot() || source->IsFloatStackSlot()) {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         SwVfpRegister temp_0 = temps.AcquireS();
         SwVfpRegister temp_1 = temps.AcquireS();
         __ vldr(temp_0, dst);
@@ -4259,7 +4343,7 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
         __ vstr(temp_0, src);
         __ vstr(temp_1, dst);
       } else if (source->IsDoubleStackSlot()) {
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         LowDwVfpRegister temp = temps.AcquireLowD();
         if (temps.CanAcquireD()) {
           DwVfpRegister temp_0 = temp;
@@ -4292,7 +4376,7 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source,
         MemOperand dst0 = dst;
         MemOperand src1(src.rn(), src.offset() + kDoubleSize);
         MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
-        UseScratchRegisterScope temps(tasm());
+        UseScratchRegisterScope temps(masm());
         DwVfpRegister temp_0 = temps.AcquireD();
         DwVfpRegister temp_1 = temps.AcquireD();
         __ vldr(temp_0, dst0);