diff options
Diffstat (limited to 'deps/v8/src/compiler/backend')
43 files changed, 1152 insertions, 1523 deletions
diff --git a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc index b01297e03a..2c7e856239 100644 --- a/deps/v8/src/compiler/backend/arm/code-generator-arm.cc +++ b/deps/v8/src/compiler/backend/arm/code-generator-arm.cc @@ -2026,7 +2026,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Simd128Register rhs = i.InputSimd128Register(1); DCHECK_EQ(dst, lhs); - // Move rhs only when rhs is strictly greater (mi). + // Move rhs only when rhs is strictly lesser (mi). __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); __ vmov(dst.low(), rhs.low(), mi); __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); @@ -2039,7 +2039,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Simd128Register rhs = i.InputSimd128Register(1); DCHECK_EQ(dst, lhs); - // Move rhs only when rhs is strictly greater (mi). + // Move rhs only when rhs is strictly greater (gt). __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); __ vmov(dst.low(), rhs.low(), gt); __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); @@ -2150,7 +2150,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmI64x2Neg: { Simd128Register dst = i.OutputSimd128Register(); __ vmov(dst, uint64_t{0}); - __ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0)); + __ vsub(Neon64, dst, dst, i.InputSimd128Register(0)); break; } case kArmI64x2Shl: { @@ -3097,7 +3097,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1), i.InputInt4(2)); break; } - case kArmS8x16Swizzle: { + case kArmI8x16Swizzle: { Simd128Register dst = i.OutputSimd128Register(), tbl = i.InputSimd128Register(0), src = i.InputSimd128Register(1); @@ -3106,7 +3106,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vtbl(dst.high(), table, src.high()); break; } - case kArmS8x16Shuffle: { + case kArmI8x16Shuffle: { Simd128Register dst = i.OutputSimd128Register(), src0 = i.InputSimd128Register(0), src1 = i.InputSimd128Register(1); @@ -3648,9 +3648,6 @@ void CodeGenerator::AssembleConstructFrame() { } } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); - if (call_descriptor->PushArgumentCount()) { - __ Push(kJavaScriptCallArgCountRegister); - } } else { __ StubPrologue(info()->GetOutputStackFrameType()); if (call_descriptor->IsWasmFunctionCall()) { diff --git a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h index 64707cb612..b3ee561e27 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h +++ b/deps/v8/src/compiler/backend/arm/instruction-codes-arm.h @@ -307,8 +307,8 @@ namespace compiler { V(ArmS8x16TransposeLeft) \ V(ArmS8x16TransposeRight) \ V(ArmS8x16Concat) \ - V(ArmS8x16Swizzle) \ - V(ArmS8x16Shuffle) \ + V(ArmI8x16Swizzle) \ + V(ArmI8x16Shuffle) \ V(ArmS32x2Reverse) \ V(ArmS16x4Reverse) \ V(ArmS16x2Reverse) \ diff --git a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc index aa9fa9e17b..6459d22a11 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc @@ -287,8 +287,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmS8x16TransposeLeft: case kArmS8x16TransposeRight: case kArmS8x16Concat: - case kArmS8x16Swizzle: - case kArmS8x16Shuffle: + case kArmI8x16Swizzle: + case kArmI8x16Shuffle: case kArmS32x2Reverse: case kArmS16x4Reverse: case kArmS16x2Reverse: diff --git a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc index c53c8f372e..e868a1a47a 100644 --- a/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/deps/v8/src/compiler/backend/arm/instruction-selector-arm.cc @@ -2870,7 +2870,7 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1, } // namespace -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; bool is_swizzle; CanonicalizeShuffle(node, shuffle, &is_swizzle); @@ -2923,18 +2923,18 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { // Code generator uses vtbl, arrange sources to form a valid lookup table. InstructionOperand src0, src1; ArrangeShuffleTable(&g, input0, input1, &src0, &src1); - Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1, + Emit(kArmI8x16Shuffle, g.DefineAsRegister(node), src0, src1, g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 8)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 12))); } -void InstructionSelector::VisitS8x16Swizzle(Node* node) { +void InstructionSelector::VisitI8x16Swizzle(Node* node) { ArmOperandGenerator g(this); // We don't want input 0 (the table) to be the same as output, since we will // modify output twice (low and high), and need to keep the table the same. - Emit(kArmS8x16Swizzle, g.DefineAsRegister(node), + Emit(kArmI8x16Swizzle, g.DefineAsRegister(node), g.UseUniqueRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); } diff --git a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc index cee8651276..6524502408 100644 --- a/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/code-generator-arm64.cc @@ -2551,18 +2551,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1).V16B(), i.InputInt4(2)); break; } - case kArm64S8x16Swizzle: { + case kArm64I8x16Swizzle: { __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(), i.InputSimd128Register(1).V16B()); break; } - case kArm64S8x16Shuffle: { + case kArm64I8x16Shuffle: { Simd128Register dst = i.OutputSimd128Register().V16B(), src0 = i.InputSimd128Register(0).V16B(), src1 = i.InputSimd128Register(1).V16B(); // Unary shuffle table is in src0, binary shuffle table is in src0, src1, // which must be consecutive. - int64_t mask = 0; + uint32_t mask = 0; if (src0 == src1) { mask = 0x0F0F0F0F; } else { @@ -2601,20 +2601,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Add(i.OutputRegister32(), i.OutputRegister32(), 1); break; } - case kArm64S8x16LoadSplat: { - __ ld1r(i.OutputSimd128Register().V16B(), i.MemoryOperand(0)); - break; - } - case kArm64S16x8LoadSplat: { - __ ld1r(i.OutputSimd128Register().V8H(), i.MemoryOperand(0)); - break; - } - case kArm64S32x4LoadSplat: { - __ ld1r(i.OutputSimd128Register().V4S(), i.MemoryOperand(0)); - break; - } - case kArm64S64x2LoadSplat: { - __ ld1r(i.OutputSimd128Register().V2D(), i.MemoryOperand(0)); + case kArm64LoadSplat: { + VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode)); + __ ld1r(i.OutputSimd128Register().Format(f), i.MemoryOperand(0)); break; } case kArm64I16x8Load8x8S: { @@ -2647,6 +2636,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S()); break; } + case kArm64S128LoadMem32Zero: { + __ Ldr(i.OutputSimd128Register().S(), i.MemoryOperand(0)); + break; + } + case kArm64S128LoadMem64Zero: { + __ Ldr(i.OutputSimd128Register().D(), i.MemoryOperand(0)); + break; + } #define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT) \ case Op: { \ UseScratchRegisterScope scope(tasm()); \ @@ -2657,13 +2654,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Cset(i.OutputRegister32(), ne); \ break; \ } - // for AnyTrue, the format does not matter, umaxv does not support 2D - SIMD_REDUCE_OP_CASE(kArm64V64x2AnyTrue, Umaxv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64V32x4AnyTrue, Umaxv, kFormatS, 4S); + // For AnyTrue, the format does not matter. + SIMD_REDUCE_OP_CASE(kArm64V128AnyTrue, Umaxv, kFormatS, 4S); SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64V16x8AnyTrue, Umaxv, kFormatH, 8H); SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H); - SIMD_REDUCE_OP_CASE(kArm64V8x16AnyTrue, Umaxv, kFormatB, 16B); SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B); } return kSuccess; @@ -2911,7 +2905,12 @@ void CodeGenerator::AssembleConstructFrame() { if (frame_access_state()->has_frame()) { // Link the frame if (call_descriptor->IsJSFunctionCall()) { + STATIC_ASSERT(InterpreterFrameConstants::kFixedFrameSize % 16 == 8); + DCHECK_EQ(required_slots % 2, 1); __ Prologue(); + // Update required_slots count since we have just claimed one extra slot. + STATIC_ASSERT(TurboAssembler::kExtraSlotClaimedByPrologue == 1); + required_slots -= TurboAssembler::kExtraSlotClaimedByPrologue; } else { __ Push<TurboAssembler::kSignLR>(lr, fp); __ Mov(fp, sp); @@ -2929,7 +2928,13 @@ void CodeGenerator::AssembleConstructFrame() { // to allocate the remaining stack slots. if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); osr_pc_offset_ = __ pc_offset(); - required_slots -= osr_helper()->UnoptimizedFrameSlots(); + size_t unoptimized_frame_slots = osr_helper()->UnoptimizedFrameSlots(); + DCHECK(call_descriptor->IsJSFunctionCall()); + DCHECK_EQ(unoptimized_frame_slots % 2, 1); + // One unoptimized frame slot has already been claimed when the actual + // arguments count was pushed. + required_slots -= + unoptimized_frame_slots - TurboAssembler::kExtraSlotClaimedByPrologue; ResetSpeculationPoison(); } @@ -2984,13 +2989,7 @@ void CodeGenerator::AssembleConstructFrame() { // recording their argument count. switch (call_descriptor->kind()) { case CallDescriptor::kCallJSFunction: - if (call_descriptor->PushArgumentCount()) { - __ Claim(required_slots + 1); // Claim extra slot for argc. - __ Str(kJavaScriptCallArgCountRegister, - MemOperand(fp, OptimizedBuiltinFrameConstants::kArgCOffset)); - } else { - __ Claim(required_slots); - } + __ Claim(required_slots); break; case CallDescriptor::kCallCodeObject: { UseScratchRegisterScope temps(tasm()); diff --git a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h index e74819d9d6..7f84a3504b 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h +++ b/deps/v8/src/compiler/backend/arm64/instruction-codes-arm64.h @@ -367,26 +367,20 @@ namespace compiler { V(Arm64S8x16TransposeLeft) \ V(Arm64S8x16TransposeRight) \ V(Arm64S8x16Concat) \ - V(Arm64S8x16Swizzle) \ - V(Arm64S8x16Shuffle) \ + V(Arm64I8x16Swizzle) \ + V(Arm64I8x16Shuffle) \ V(Arm64S32x2Reverse) \ V(Arm64S16x4Reverse) \ V(Arm64S16x2Reverse) \ V(Arm64S8x8Reverse) \ V(Arm64S8x4Reverse) \ V(Arm64S8x2Reverse) \ - V(Arm64V64x2AnyTrue) \ + V(Arm64V128AnyTrue) \ V(Arm64V64x2AllTrue) \ - V(Arm64V32x4AnyTrue) \ V(Arm64V32x4AllTrue) \ - V(Arm64V16x8AnyTrue) \ V(Arm64V16x8AllTrue) \ - V(Arm64V8x16AnyTrue) \ V(Arm64V8x16AllTrue) \ - V(Arm64S8x16LoadSplat) \ - V(Arm64S16x8LoadSplat) \ - V(Arm64S32x4LoadSplat) \ - V(Arm64S64x2LoadSplat) \ + V(Arm64LoadSplat) \ V(Arm64I16x8Load8x8S) \ V(Arm64I16x8Load8x8U) \ V(Arm64I32x4Load16x4S) \ @@ -428,7 +422,11 @@ namespace compiler { V(Arm64Word64AtomicCompareExchangeUint8) \ V(Arm64Word64AtomicCompareExchangeUint16) \ V(Arm64Word64AtomicCompareExchangeUint32) \ - V(Arm64Word64AtomicCompareExchangeUint64) + V(Arm64Word64AtomicCompareExchangeUint64) \ + V(Arm64S128LoadMem32Zero) \ + V(Arm64S128LoadMem64Zero) +// TODO(v8:10930) Adding new codes before these atomic instructions causes a +// mksnapshot error. // Addressing modes represent the "shape" of inputs to an instruction. // Many instructions support multiple addressing modes. Addressing modes diff --git a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc index 5d75c5147e..6c572d2a1c 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc @@ -337,21 +337,18 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64S8x16TransposeLeft: case kArm64S8x16TransposeRight: case kArm64S8x16Concat: - case kArm64S8x16Swizzle: - case kArm64S8x16Shuffle: + case kArm64I8x16Swizzle: + case kArm64I8x16Shuffle: case kArm64S32x2Reverse: case kArm64S16x4Reverse: case kArm64S16x2Reverse: case kArm64S8x8Reverse: case kArm64S8x4Reverse: case kArm64S8x2Reverse: - case kArm64V64x2AnyTrue: + case kArm64V128AnyTrue: case kArm64V64x2AllTrue: - case kArm64V32x4AnyTrue: case kArm64V32x4AllTrue: - case kArm64V16x8AnyTrue: case kArm64V16x8AllTrue: - case kArm64V8x16AnyTrue: case kArm64V8x16AllTrue: case kArm64TestAndBranch32: case kArm64TestAndBranch: @@ -373,16 +370,15 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64LdrDecompressTaggedPointer: case kArm64LdrDecompressAnyTagged: case kArm64Peek: - case kArm64S8x16LoadSplat: - case kArm64S16x8LoadSplat: - case kArm64S32x4LoadSplat: - case kArm64S64x2LoadSplat: + case kArm64LoadSplat: case kArm64I16x8Load8x8S: case kArm64I16x8Load8x8U: case kArm64I32x4Load16x4S: case kArm64I32x4Load16x4U: case kArm64I64x2Load32x2S: case kArm64I64x2Load32x2U: + case kArm64S128LoadMem32Zero: + case kArm64S128LoadMem64Zero: return kIsLoadOperation; case kArm64Claim: diff --git a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc index 5f19a6bb7c..fac7f9c1d1 100644 --- a/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/deps/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -607,19 +607,23 @@ void InstructionSelector::VisitLoadTransform(Node* node) { bool require_add = false; switch (params.transformation) { case LoadTransformation::kS8x16LoadSplat: - opcode = kArm64S8x16LoadSplat; + opcode = kArm64LoadSplat; + opcode |= MiscField::encode(8); require_add = true; break; case LoadTransformation::kS16x8LoadSplat: - opcode = kArm64S16x8LoadSplat; + opcode = kArm64LoadSplat; + opcode |= MiscField::encode(16); require_add = true; break; case LoadTransformation::kS32x4LoadSplat: - opcode = kArm64S32x4LoadSplat; + opcode = kArm64LoadSplat; + opcode |= MiscField::encode(32); require_add = true; break; case LoadTransformation::kS64x2LoadSplat: - opcode = kArm64S64x2LoadSplat; + opcode = kArm64LoadSplat; + opcode |= MiscField::encode(64); require_add = true; break; case LoadTransformation::kI16x8Load8x8S: @@ -640,6 +644,12 @@ void InstructionSelector::VisitLoadTransform(Node* node) { case LoadTransformation::kI64x2Load32x2U: opcode = kArm64I64x2Load32x2U; break; + case LoadTransformation::kS128LoadMem32Zero: + opcode = kArm64S128LoadMem32Zero; + break; + case LoadTransformation::kS128LoadMem64Zero: + opcode = kArm64S128LoadMem64Zero; + break; default: UNIMPLEMENTED(); } @@ -1397,7 +1407,7 @@ void InstructionSelector::VisitWord64Ror(Node* node) { V(Float64Max, kArm64Float64Max) \ V(Float32Min, kArm64Float32Min) \ V(Float64Min, kArm64Float64Min) \ - V(S8x16Swizzle, kArm64S8x16Swizzle) + V(I8x16Swizzle, kArm64I8x16Swizzle) #define RR_VISITOR(Name, opcode) \ void InstructionSelector::Visit##Name(Node* node) { \ @@ -1775,10 +1785,9 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) { VisitRR(this, kArm64Sxtw, node); } -void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { - Arm64OperandGenerator g(this); - Node* value = node->InputAt(0); - switch (value->opcode()) { +bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) { + DCHECK_NE(node->opcode(), IrOpcode::kPhi); + switch (node->opcode()) { case IrOpcode::kWord32And: case IrOpcode::kWord32Or: case IrOpcode::kWord32Xor: @@ -1805,26 +1814,31 @@ void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { // 32-bit operations will write their result in a W register (implicitly // clearing the top 32-bit of the corresponding X register) so the // zero-extension is a no-op. - EmitIdentity(node); - return; + return true; } case IrOpcode::kLoad: { // As for the operations above, a 32-bit load will implicitly clear the // top 32 bits of the destination register. - LoadRepresentation load_rep = LoadRepresentationOf(value->op()); + LoadRepresentation load_rep = LoadRepresentationOf(node->op()); switch (load_rep.representation()) { case MachineRepresentation::kWord8: case MachineRepresentation::kWord16: case MachineRepresentation::kWord32: - EmitIdentity(node); - return; + return true; default: - break; + return false; } - break; } default: - break; + return false; + } +} + +void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { + Arm64OperandGenerator g(this); + Node* value = node->InputAt(0); + if (ZeroExtendsWord32ToWord64(value)) { + return EmitIdentity(node); } Emit(kArm64Mov32, g.DefineAsRegister(node), g.UseRegister(value)); } @@ -3225,13 +3239,13 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I8x16Neg, kArm64I8x16Neg) \ V(I8x16Abs, kArm64I8x16Abs) \ V(S128Not, kArm64S128Not) \ - V(V64x2AnyTrue, kArm64V64x2AnyTrue) \ + V(V64x2AnyTrue, kArm64V128AnyTrue) \ V(V64x2AllTrue, kArm64V64x2AllTrue) \ - V(V32x4AnyTrue, kArm64V32x4AnyTrue) \ + V(V32x4AnyTrue, kArm64V128AnyTrue) \ V(V32x4AllTrue, kArm64V32x4AllTrue) \ - V(V16x8AnyTrue, kArm64V16x8AnyTrue) \ + V(V16x8AnyTrue, kArm64V128AnyTrue) \ V(V16x8AllTrue, kArm64V16x8AllTrue) \ - V(V8x16AnyTrue, kArm64V8x16AnyTrue) \ + V(V8x16AnyTrue, kArm64V128AnyTrue) \ V(V8x16AllTrue, kArm64V8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ @@ -3597,7 +3611,7 @@ void ArrangeShuffleTable(Arm64OperandGenerator* g, Node* input0, Node* input1, } // namespace -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; bool is_swizzle; CanonicalizeShuffle(node, shuffle, &is_swizzle); @@ -3647,7 +3661,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { // Code generator uses vtbl, arrange sources to form a valid lookup table. InstructionOperand src0, src1; ArrangeShuffleTable(&g, input0, input1, &src0, &src1); - Emit(kArm64S8x16Shuffle, g.DefineAsRegister(node), src0, src1, + Emit(kArm64I8x16Shuffle, g.DefineAsRegister(node), src0, src1, g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 8)), diff --git a/deps/v8/src/compiler/backend/code-generator-impl.h b/deps/v8/src/compiler/backend/code-generator-impl.h index e335135240..88f82fe930 100644 --- a/deps/v8/src/compiler/backend/code-generator-impl.h +++ b/deps/v8/src/compiler/backend/code-generator-impl.h @@ -259,6 +259,12 @@ class OutOfLineCode : public ZoneObject { inline bool HasCallDescriptorFlag(Instruction* instr, CallDescriptor::Flag flag) { + STATIC_ASSERT(CallDescriptor::kFlagsBitsEncodedInInstructionCode == 10); +#ifdef DEBUG + static constexpr int kInstructionCodeFlagsMask = + ((1 << CallDescriptor::kFlagsBitsEncodedInInstructionCode) - 1); + DCHECK_EQ(static_cast<int>(flag) & kInstructionCodeFlagsMask, flag); +#endif return MiscField::decode(instr->opcode()) & flag; } diff --git a/deps/v8/src/compiler/backend/code-generator.cc b/deps/v8/src/compiler/backend/code-generator.cc index 6e740b18f8..33a80f52d0 100644 --- a/deps/v8/src/compiler/backend/code-generator.cc +++ b/deps/v8/src/compiler/backend/code-generator.cc @@ -613,8 +613,8 @@ void CodeGenerator::GetPushCompatibleMoves(Instruction* instr, // then the full gap resolver must be used since optimization with // pushes don't participate in the parallel move and might clobber // values needed for the gap resolve. - if (source.IsStackSlot() && LocationOperand::cast(source).index() >= - first_push_compatible_index) { + if (source.IsAnyStackSlot() && LocationOperand::cast(source).index() >= + first_push_compatible_index) { pushes->clear(); return; } @@ -973,15 +973,13 @@ Label* CodeGenerator::AddJumpTable(Label** targets, size_t target_count) { } void CodeGenerator::RecordCallPosition(Instruction* instr) { - CallDescriptor::Flags flags(MiscField::decode(instr->opcode())); - - bool needs_frame_state = (flags & CallDescriptor::kNeedsFrameState); - + const bool needs_frame_state = + HasCallDescriptorFlag(instr, CallDescriptor::kNeedsFrameState); RecordSafepoint(instr->reference_map(), needs_frame_state ? Safepoint::kLazyDeopt : Safepoint::kNoLazyDeopt); - if (flags & CallDescriptor::kHasExceptionHandler) { + if (HasCallDescriptorFlag(instr, CallDescriptor::kHasExceptionHandler)) { InstructionOperandConverter i(this, instr); RpoNumber handler_rpo = i.InputRpo(instr->InputCount() - 1); DCHECK(instructions()->InstructionBlockAt(handler_rpo)->IsHandler()); diff --git a/deps/v8/src/compiler/backend/code-generator.h b/deps/v8/src/compiler/backend/code-generator.h index 74ec66f8d8..26d03f129a 100644 --- a/deps/v8/src/compiler/backend/code-generator.h +++ b/deps/v8/src/compiler/backend/code-generator.h @@ -310,11 +310,11 @@ class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler { static bool IsValidPush(InstructionOperand source, PushTypeFlags push_type); - // Generate a list moves from an instruction that are candidates to be turned - // into push instructions on platforms that support them. In general, the list - // of push candidates are moves to a set of contiguous destination + // Generate a list of moves from an instruction that are candidates to be + // turned into push instructions on platforms that support them. In general, + // the list of push candidates are moves to a set of contiguous destination // InstructionOperand locations on the stack that don't clobber values that - // are needed for resolve the gap or use values generated by the gap, + // are needed to resolve the gap or use values generated by the gap, // i.e. moves that can be hoisted together before the actual gap and assembled // together. static void GetPushCompatibleMoves(Instruction* instr, diff --git a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc index 2878d6e56b..077324a31f 100644 --- a/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/code-generator-ia32.cc @@ -2214,9 +2214,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ psrld(kScratchDoubleReg, 1); __ andps(dst, kScratchDoubleReg); } else { + // TODO(zhin) Improve codegen for this case. __ pcmpeqd(dst, dst); + __ movups(kScratchDoubleReg, src); __ psrld(dst, 1); - __ andps(dst, src); + __ andps(dst, kScratchDoubleReg); } break; } @@ -2236,9 +2238,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ pslld(kScratchDoubleReg, 31); __ xorps(dst, kScratchDoubleReg); } else { + // TODO(zhin) Improve codegen for this case. __ pcmpeqd(dst, dst); + __ movups(kScratchDoubleReg, src); __ pslld(dst, 31); - __ xorps(dst, src); + __ xorps(dst, kScratchDoubleReg); } break; } @@ -2251,7 +2255,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kSSEF32x4Sqrt: { - __ sqrtps(i.OutputSimd128Register(), i.InputOperand(0)); + // TODO(zhin) Improve codegen for this case. + __ movups(kScratchDoubleReg, i.InputOperand(0)); + __ sqrtps(i.OutputSimd128Register(), kScratchDoubleReg); break; } case kAVXF32x4Sqrt: { @@ -2348,7 +2354,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Operand src1 = i.InputOperand(1); // See comment above for correction of minps. __ movups(kScratchDoubleReg, src1); - __ vminps(kScratchDoubleReg, kScratchDoubleReg, dst); + __ vminps(kScratchDoubleReg, kScratchDoubleReg, src0); __ vminps(dst, src0, src1); __ vorps(dst, dst, kScratchDoubleReg); __ vcmpneqps(kScratchDoubleReg, dst, dst); @@ -2381,11 +2387,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kAVXF32x4Max: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); + XMMRegister src0 = i.InputSimd128Register(0); Operand src1 = i.InputOperand(1); // See comment above for correction of maxps. __ vmovups(kScratchDoubleReg, src1); - __ vmaxps(kScratchDoubleReg, kScratchDoubleReg, dst); - __ vmaxps(dst, dst, src1); + __ vmaxps(kScratchDoubleReg, kScratchDoubleReg, src0); + __ vmaxps(dst, src0, src1); __ vxorps(dst, dst, kScratchDoubleReg); __ vorps(kScratchDoubleReg, kScratchDoubleReg, dst); __ vsubps(kScratchDoubleReg, kScratchDoubleReg, dst); @@ -3643,8 +3650,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ pxor(dst, kScratchDoubleReg); } else { + // TODO(zhin) Improve codegen for this case. __ pcmpeqd(dst, dst); - __ pxor(dst, src); + __ movups(kScratchDoubleReg, src); + __ pxor(dst, kScratchDoubleReg); } break; } @@ -3715,7 +3724,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Andnps(dst, src1); break; } - case kIA32S8x16Swizzle: { + case kIA32I8x16Swizzle: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); XMMRegister mask = i.TempSimd128Register(0); @@ -3728,7 +3737,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Pshufb(dst, mask); break; } - case kIA32S8x16Shuffle: { + case kIA32I8x16Shuffle: { XMMRegister dst = i.OutputSimd128Register(); Operand src0 = i.InputOperand(0); Register tmp = i.TempRegister(0); @@ -4690,9 +4699,6 @@ void CodeGenerator::AssembleConstructFrame() { } } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); - if (call_descriptor->PushArgumentCount()) { - __ push(kJavaScriptCallArgCountRegister); - } } else { __ StubPrologue(info()->GetOutputStackFrameType()); if (call_descriptor->IsWasmFunctionCall()) { @@ -4836,10 +4842,11 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { } else { Register pop_reg = g.ToRegister(pop); Register scratch_reg = pop_reg == ecx ? edx : ecx; - __ pop(scratch_reg); + __ PopReturnAddressTo(scratch_reg); __ lea(esp, Operand(esp, pop_reg, times_system_pointer_size, static_cast<int>(pop_size))); - __ jmp(scratch_reg); + __ PushReturnAddressFrom(scratch_reg); + __ Ret(); } } diff --git a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h index 8f9f4fcf1c..eca9dc9227 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/deps/v8/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -355,8 +355,8 @@ namespace compiler { V(SSES128Select) \ V(AVXS128Select) \ V(IA32S128AndNot) \ - V(IA32S8x16Swizzle) \ - V(IA32S8x16Shuffle) \ + V(IA32I8x16Swizzle) \ + V(IA32I8x16Shuffle) \ V(IA32S8x16LoadSplat) \ V(IA32S16x8LoadSplat) \ V(IA32S32x4LoadSplat) \ diff --git a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index 51a9a18e44..24abd58c7f 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -336,8 +336,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kSSES128Select: case kAVXS128Select: case kIA32S128AndNot: - case kIA32S8x16Swizzle: - case kIA32S8x16Shuffle: + case kIA32I8x16Swizzle: + case kIA32I8x16Shuffle: case kIA32S32x4Swizzle: case kIA32S32x4Shuffle: case kIA32S16x8Blend: diff --git a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc index b0556fd4ef..fec4053871 100644 --- a/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/deps/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -2687,7 +2687,7 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, } // namespace -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; bool is_swizzle; CanonicalizeShuffle(node, shuffle, &is_swizzle); @@ -2704,9 +2704,12 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { // AVX and swizzles don't generally need DefineSameAsFirst to avoid a move. bool no_same_as_first = use_avx || is_swizzle; // We generally need UseRegister for input0, Use for input1. + // TODO(v8:9198): We don't have 16-byte alignment for SIMD operands yet, but + // we retain this logic (continue setting these in the various shuffle match + // clauses), but ignore it when selecting registers or slots. bool src0_needs_reg = true; bool src1_needs_reg = false; - ArchOpcode opcode = kIA32S8x16Shuffle; // general shuffle is the default + ArchOpcode opcode = kIA32I8x16Shuffle; // general shuffle is the default uint8_t offset; uint8_t shuffle32x4[4]; @@ -2794,7 +2797,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { src0_needs_reg = true; imms[imm_count++] = index; } - if (opcode == kIA32S8x16Shuffle) { + if (opcode == kIA32I8x16Shuffle) { // Use same-as-first for general swizzle, but not shuffle. no_same_as_first = !is_swizzle; src0_needs_reg = !no_same_as_first; @@ -2810,16 +2813,18 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { Node* input0 = node->InputAt(0); InstructionOperand dst = no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node); - InstructionOperand src0 = - src0_needs_reg ? g.UseRegister(input0) : g.Use(input0); + // TODO(v8:9198): Use src0_needs_reg when we have memory alignment for SIMD. + InstructionOperand src0 = g.UseRegister(input0); + USE(src0_needs_reg); int input_count = 0; InstructionOperand inputs[2 + kMaxImms + kMaxTemps]; inputs[input_count++] = src0; if (!is_swizzle) { Node* input1 = node->InputAt(1); - inputs[input_count++] = - src1_needs_reg ? g.UseRegister(input1) : g.Use(input1); + // TODO(v8:9198): Use src1_needs_reg when we have memory alignment for SIMD. + inputs[input_count++] = g.UseRegister(input1); + USE(src1_needs_reg); } for (int i = 0; i < imm_count; ++i) { inputs[input_count++] = g.UseImmediate(imms[i]); @@ -2827,10 +2832,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps); } -void InstructionSelector::VisitS8x16Swizzle(Node* node) { +void InstructionSelector::VisitI8x16Swizzle(Node* node) { IA32OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register()}; - Emit(kIA32S8x16Swizzle, g.DefineSameAsFirst(node), + Emit(kIA32I8x16Swizzle, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } diff --git a/deps/v8/src/compiler/backend/instruction-codes.h b/deps/v8/src/compiler/backend/instruction-codes.h index 84d5d249b8..8772a78df0 100644 --- a/deps/v8/src/compiler/backend/instruction-codes.h +++ b/deps/v8/src/compiler/backend/instruction-codes.h @@ -270,6 +270,8 @@ using InstructionCode = uint32_t; // continuation into a single InstructionCode which is stored as part of // the instruction. using ArchOpcodeField = base::BitField<ArchOpcode, 0, 9>; +static_assert(ArchOpcodeField::is_valid(kLastArchOpcode), + "All opcodes must fit in the 9-bit ArchOpcodeField."); using AddressingModeField = base::BitField<AddressingMode, 9, 5>; using FlagsModeField = base::BitField<FlagsMode, 14, 3>; using FlagsConditionField = base::BitField<FlagsCondition, 17, 5>; diff --git a/deps/v8/src/compiler/backend/instruction-selector.cc b/deps/v8/src/compiler/backend/instruction-selector.cc index d1594f9305..1c14832bbf 100644 --- a/deps/v8/src/compiler/backend/instruction-selector.cc +++ b/deps/v8/src/compiler/backend/instruction-selector.cc @@ -62,7 +62,12 @@ InstructionSelector::InstructionSelector( trace_turbo_(trace_turbo), tick_counter_(tick_counter), max_unoptimized_frame_height_(max_unoptimized_frame_height), - max_pushed_argument_count_(max_pushed_argument_count) { + max_pushed_argument_count_(max_pushed_argument_count) +#if V8_TARGET_ARCH_64_BIT + , + phi_states_(node_count, Upper32BitsState::kNotYetChecked, zone) +#endif +{ DCHECK_EQ(*max_unoptimized_frame_height, 0); // Caller-initialized. instructions_.reserve(node_count); @@ -2214,10 +2219,10 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitS128Select(node); case IrOpcode::kS128AndNot: return MarkAsSimd128(node), VisitS128AndNot(node); - case IrOpcode::kS8x16Swizzle: - return MarkAsSimd128(node), VisitS8x16Swizzle(node); - case IrOpcode::kS8x16Shuffle: - return MarkAsSimd128(node), VisitS8x16Shuffle(node); + case IrOpcode::kI8x16Swizzle: + return MarkAsSimd128(node), VisitI8x16Swizzle(node); + case IrOpcode::kI8x16Shuffle: + return MarkAsSimd128(node), VisitI8x16Shuffle(node); case IrOpcode::kV64x2AnyTrue: return MarkAsWord32(node), VisitV64x2AnyTrue(node); case IrOpcode::kV64x2AllTrue: @@ -2681,39 +2686,6 @@ void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X -#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 && \ - !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_MIPS && \ - !V8_TARGET_ARCH_MIPS64 -// TODO(v8:10308) Bitmask operations are in prototype now, we can remove these -// guards when they go into the proposal. -void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI16x8BitMask(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); } -// TODO(v8:10501) Prototyping pmin and pmax instructions. -void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } -#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 - // && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && - // !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 - -#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X && \ - !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \ - !V8_TARGET_ARCH_MIPS64 -// TODO(v8:10553) Prototyping floating point rounding instructions. -void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); } -#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X - // && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM && - // !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 - #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 && \ !V8_TARGET_ARCH_ARM // TODO(v8:10583) Prototype i32x4.dot_i16x8_s @@ -2737,10 +2709,20 @@ void InstructionSelector::VisitParameter(Node* node) { } namespace { + LinkageLocation ExceptionLocation() { return LinkageLocation::ForRegister(kReturnRegister0.code(), MachineType::IntPtr()); } + +constexpr InstructionCode EncodeCallDescriptorFlags( + InstructionCode opcode, CallDescriptor::Flags flags) { + // Note: Not all bits of `flags` are preserved. + STATIC_ASSERT(CallDescriptor::kFlagsBitsEncodedInInstructionCode == + MiscField::kSize); + return opcode | MiscField::encode(flags & MiscField::kMax); +} + } // namespace void InstructionSelector::VisitIfException(Node* node) { @@ -2863,6 +2845,7 @@ void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) { #if ABI_USES_FUNCTION_DESCRIPTORS // Highest misc_field bit is used on AIX to indicate if a CFunction call // has function descriptor or not. + STATIC_ASSERT(MiscField::kSize == kHasFunctionDescriptorBitShift + 1); if (!call_descriptor->NoFunctionDescriptor()) { misc_field |= 1 << kHasFunctionDescriptorBitShift; } @@ -2871,18 +2854,18 @@ void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) { break; } case CallDescriptor::kCallCodeObject: - opcode = kArchCallCodeObject | MiscField::encode(flags); + opcode = EncodeCallDescriptorFlags(kArchCallCodeObject, flags); break; case CallDescriptor::kCallJSFunction: - opcode = kArchCallJSFunction | MiscField::encode(flags); + opcode = EncodeCallDescriptorFlags(kArchCallJSFunction, flags); break; case CallDescriptor::kCallWasmCapiFunction: case CallDescriptor::kCallWasmFunction: case CallDescriptor::kCallWasmImportWrapper: - opcode = kArchCallWasmFunction | MiscField::encode(flags); + opcode = EncodeCallDescriptorFlags(kArchCallWasmFunction, flags); break; case CallDescriptor::kCallBuiltinPointer: - opcode = kArchCallBuiltinPointer | MiscField::encode(flags); + opcode = EncodeCallDescriptorFlags(kArchCallBuiltinPointer, flags); break; } @@ -2912,9 +2895,9 @@ void InstructionSelector::VisitTailCall(Node* node) { auto call_descriptor = CallDescriptorOf(node->op()); CallDescriptor* caller = linkage()->GetIncomingDescriptor(); - DCHECK(caller->CanTailCall(CallDescriptorOf(node->op()))); const CallDescriptor* callee = CallDescriptorOf(node->op()); - int stack_param_delta = callee->GetStackParameterDelta(caller); + DCHECK(caller->CanTailCall(callee)); + const int stack_param_delta = callee->GetStackParameterDelta(caller); CallBuffer buffer(zone(), call_descriptor, nullptr); // Compute InstructionOperands for inputs and outputs. @@ -2931,7 +2914,7 @@ void InstructionSelector::VisitTailCall(Node* node) { // Select the appropriate opcode based on the call type. InstructionCode opcode; InstructionOperandVector temps(zone()); - if (linkage()->GetIncomingDescriptor()->IsJSFunctionCall()) { + if (caller->IsJSFunctionCall()) { switch (call_descriptor->kind()) { case CallDescriptor::kCallCodeObject: opcode = kArchTailCallCodeObjectFromJSFunction; @@ -2960,7 +2943,7 @@ void InstructionSelector::VisitTailCall(Node* node) { return; } } - opcode |= MiscField::encode(call_descriptor->flags()); + opcode = EncodeCallDescriptorFlags(opcode, call_descriptor->flags()); Emit(kArchPrepareTailCall, g.NoOutput()); @@ -2969,7 +2952,7 @@ void InstructionSelector::VisitTailCall(Node* node) { // instruction. This is used by backends that need to pad arguments for stack // alignment, in order to store an optional slot of padding above the // arguments. - int optional_padding_slot = callee->GetFirstUnusedStackSlot(); + const int optional_padding_slot = callee->GetFirstUnusedStackSlot(); buffer.instruction_args.push_back(g.TempImmediate(optional_padding_slot)); const int first_unused_stack_slot = @@ -3131,6 +3114,54 @@ bool InstructionSelector::CanProduceSignalingNaN(Node* node) { return true; } +#if V8_TARGET_ARCH_64_BIT +bool InstructionSelector::ZeroExtendsWord32ToWord64(Node* node, + int recursion_depth) { + // To compute whether a Node sets its upper 32 bits to zero, there are three + // cases. + // 1. Phi node, with a computed result already available in phi_states_: + // Read the value from phi_states_. + // 2. Phi node, with no result available in phi_states_ yet: + // Recursively check its inputs, and store the result in phi_states_. + // 3. Anything else: + // Call the architecture-specific ZeroExtendsWord32ToWord64NoPhis. + + // Limit recursion depth to avoid the possibility of stack overflow on very + // large functions. + const int kMaxRecursionDepth = 100; + + if (node->opcode() == IrOpcode::kPhi) { + Upper32BitsState current = phi_states_[node->id()]; + if (current != Upper32BitsState::kNotYetChecked) { + return current == Upper32BitsState::kUpperBitsGuaranteedZero; + } + + // If further recursion is prevented, we can't make any assumptions about + // the output of this phi node. + if (recursion_depth >= kMaxRecursionDepth) { + return false; + } + + // Mark the current node so that we skip it if we recursively visit it + // again. Or, said differently, we compute a largest fixed-point so we can + // be optimistic when we hit cycles. + phi_states_[node->id()] = Upper32BitsState::kUpperBitsGuaranteedZero; + + int input_count = node->op()->ValueInputCount(); + for (int i = 0; i < input_count; ++i) { + Node* input = NodeProperties::GetValueInput(node, i); + if (!ZeroExtendsWord32ToWord64(input, recursion_depth + 1)) { + phi_states_[node->id()] = Upper32BitsState::kNoGuarantee; + return false; + } + } + + return true; + } + return ZeroExtendsWord32ToWord64NoPhis(node); +} +#endif // V8_TARGET_ARCH_64_BIT + namespace { FrameStateDescriptor* GetFrameStateDescriptorInternal(Zone* zone, Node* state) { diff --git a/deps/v8/src/compiler/backend/instruction-selector.h b/deps/v8/src/compiler/backend/instruction-selector.h index 0aa5dbbeaf..6452e3ec4c 100644 --- a/deps/v8/src/compiler/backend/instruction-selector.h +++ b/deps/v8/src/compiler/backend/instruction-selector.h @@ -667,6 +667,17 @@ class V8_EXPORT_PRIVATE InstructionSelector final { void VisitWord64AtomicNarrowBinop(Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode uint32_op); +#if V8_TARGET_ARCH_64_BIT + bool ZeroExtendsWord32ToWord64(Node* node, int recursion_depth = 0); + bool ZeroExtendsWord32ToWord64NoPhis(Node* node); + + enum Upper32BitsState : uint8_t { + kNotYetChecked, + kUpperBitsGuaranteedZero, + kNoGuarantee, + }; +#endif // V8_TARGET_ARCH_64_BIT + // =========================================================================== Zone* const zone_; @@ -702,6 +713,13 @@ class V8_EXPORT_PRIVATE InstructionSelector final { // arguments (for calls). Later used to apply an offset to stack checks. size_t* max_unoptimized_frame_height_; size_t* max_pushed_argument_count_; + +#if V8_TARGET_ARCH_64_BIT + // Holds lazily-computed results for whether phi nodes guarantee their upper + // 32 bits to be zero. Indexed by node ID; nobody reads or writes the values + // for non-phi nodes. + ZoneVector<Upper32BitsState> phi_states_; +#endif }; } // namespace compiler diff --git a/deps/v8/src/compiler/backend/live-range-separator.cc b/deps/v8/src/compiler/backend/live-range-separator.cc deleted file mode 100644 index acfe23dd06..0000000000 --- a/deps/v8/src/compiler/backend/live-range-separator.cc +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2015 the V8 project authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "src/compiler/backend/live-range-separator.h" -#include "src/compiler/backend/register-allocator.h" - -namespace v8 { -namespace internal { -namespace compiler { - -#define TRACE_COND(cond, ...) \ - do { \ - if (cond) PrintF(__VA_ARGS__); \ - } while (false) - -namespace { - -void CreateSplinter(TopLevelLiveRange* range, - TopTierRegisterAllocationData* data, - LifetimePosition first_cut, LifetimePosition last_cut, - bool trace_alloc) { - DCHECK(!range->IsSplinter()); - // We can ignore ranges that live solely in deferred blocks. - // If a range ends right at the end of a deferred block, it is marked by - // the range builder as ending at gap start of the next block - since the - // end is a position where the variable isn't live. We need to take that - // into consideration. - LifetimePosition max_allowed_end = last_cut.NextFullStart(); - - if (first_cut <= range->Start() && max_allowed_end >= range->End()) { - return; - } - - LifetimePosition start = Max(first_cut, range->Start()); - LifetimePosition end = Min(last_cut, range->End()); - - if (start < end) { - // Ensure the original range has a spill range associated, before it gets - // splintered. Splinters will point to it. This way, when attempting to - // reuse spill slots of splinters, during allocation, we avoid clobbering - // such slots. - if (range->MayRequireSpillRange()) { - data->CreateSpillRangeForLiveRange(range); - } - if (range->splinter() == nullptr) { - TopLevelLiveRange* splinter = - data->NextLiveRange(range->representation()); - DCHECK_NULL(data->live_ranges()[splinter->vreg()]); - data->live_ranges()[splinter->vreg()] = splinter; - range->SetSplinter(splinter); - } - Zone* zone = data->allocation_zone(); - TRACE_COND(trace_alloc, - "creating splinter %d for range %d between %d and %d\n", - range->splinter()->vreg(), range->vreg(), - start.ToInstructionIndex(), end.ToInstructionIndex()); - range->Splinter(start, end, zone); - } -} - -void SetSlotUse(TopLevelLiveRange* range) { - range->reset_slot_use(); - for (const UsePosition* pos = range->first_pos(); - !range->has_slot_use() && pos != nullptr; pos = pos->next()) { - if (pos->type() == UsePositionType::kRequiresSlot) { - range->register_slot_use(TopLevelLiveRange::SlotUseKind::kGeneralSlotUse); - } - } -} - -void SplinterLiveRange(TopLevelLiveRange* range, - TopTierRegisterAllocationData* data) { - const InstructionSequence* code = data->code(); - UseInterval* interval = range->first_interval(); - - LifetimePosition first_cut = LifetimePosition::Invalid(); - LifetimePosition last_cut = LifetimePosition::Invalid(); - - while (interval != nullptr) { - // We have to cache these here, as splintering might destroy the original - // interval below. - UseInterval* next_interval = interval->next(); - LifetimePosition interval_end = interval->end(); - const InstructionBlock* first_block = - code->GetInstructionBlock(interval->FirstGapIndex()); - const InstructionBlock* last_block = - code->GetInstructionBlock(interval->LastGapIndex()); - int first_block_nr = first_block->rpo_number().ToInt(); - int last_block_nr = last_block->rpo_number().ToInt(); - for (int block_id = first_block_nr; block_id <= last_block_nr; ++block_id) { - const InstructionBlock* current_block = - code->InstructionBlockAt(RpoNumber::FromInt(block_id)); - if (current_block->IsDeferred()) { - if (!first_cut.IsValid()) { - first_cut = LifetimePosition::GapFromInstructionIndex( - current_block->first_instruction_index()); - } - // We splinter until the last gap in the block. I assume this is done to - // leave a little range to be allocated by normal register allocation - // and then use that range to connect when splinters are merged back. - // This might be done as control flow resolution does not insert moves - // if two consecutive blocks in rpo order are also consecutive in - // control flow. - last_cut = LifetimePosition::GapFromInstructionIndex( - current_block->last_instruction_index()); - } else { - if (first_cut.IsValid()) { - CreateSplinter(range, data, first_cut, last_cut, - data->is_trace_alloc()); - first_cut = LifetimePosition::Invalid(); - last_cut = LifetimePosition::Invalid(); - } - } - } - // If we reach the end of an interval with a first_cut and last_cut set, it - // means that we can splinter to the end of the interval, as the value dies - // in this control flow branch or is not live in the next block. In the - // former case, we won't need to reload the value, so we can splinter to the - // end of its lifetime. In the latter case, control flow resolution will - // have to connect blocks anyway, so we can also splinter to the end of the - // block, too. - if (first_cut.IsValid()) { - CreateSplinter(range, data, first_cut, interval_end, - data->is_trace_alloc()); - first_cut = LifetimePosition::Invalid(); - last_cut = LifetimePosition::Invalid(); - } - interval = next_interval; - } - - // Redo has_slot_use - if (range->has_slot_use() && range->splinter() != nullptr) { - SetSlotUse(range); - SetSlotUse(range->splinter()); - } -} - -} // namespace - -void LiveRangeSeparator::Splinter() { - size_t virt_reg_count = data()->live_ranges().size(); - for (size_t vreg = 0; vreg < virt_reg_count; ++vreg) { - TopLevelLiveRange* range = data()->live_ranges()[vreg]; - if (range == nullptr || range->IsEmpty() || range->IsSplinter()) { - continue; - } - int first_instr = range->first_interval()->FirstGapIndex(); - if (!data()->code()->GetInstructionBlock(first_instr)->IsDeferred()) { - SplinterLiveRange(range, data()); - } - } -} - -void LiveRangeMerger::MarkRangesSpilledInDeferredBlocks() { - const InstructionSequence* code = data()->code(); - for (TopLevelLiveRange* top : data()->live_ranges()) { - if (top == nullptr || top->IsEmpty() || top->splinter() == nullptr || - top->HasSpillOperand() || !top->splinter()->HasSpillRange()) { - continue; - } - - LiveRange* child = top; - for (; child != nullptr; child = child->next()) { - if (child->spilled() || - child->NextSlotPosition(child->Start()) != nullptr) { - break; - } - } - if (child == nullptr) { - DCHECK(!data()->is_turbo_control_flow_aware_allocation()); - top->TreatAsSpilledInDeferredBlock(data()->allocation_zone(), - code->InstructionBlockCount()); - } - } -} - -void LiveRangeMerger::Merge() { - MarkRangesSpilledInDeferredBlocks(); - - int live_range_count = static_cast<int>(data()->live_ranges().size()); - for (int i = 0; i < live_range_count; ++i) { - TopLevelLiveRange* range = data()->live_ranges()[i]; - if (range == nullptr || range->IsEmpty() || !range->IsSplinter()) { - continue; - } - TopLevelLiveRange* splinter_parent = range->splintered_from(); - - int to_remove = range->vreg(); - splinter_parent->Merge(range, data()->allocation_zone()); - data()->live_ranges()[to_remove] = nullptr; - } -} - -#undef TRACE_COND - -} // namespace compiler -} // namespace internal -} // namespace v8 diff --git a/deps/v8/src/compiler/backend/live-range-separator.h b/deps/v8/src/compiler/backend/live-range-separator.h deleted file mode 100644 index f84b275e08..0000000000 --- a/deps/v8/src/compiler/backend/live-range-separator.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2015 the V8 project authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef V8_COMPILER_BACKEND_LIVE_RANGE_SEPARATOR_H_ -#define V8_COMPILER_BACKEND_LIVE_RANGE_SEPARATOR_H_ - -#include "src/zone/zone.h" -namespace v8 { -namespace internal { - -class Zone; - -namespace compiler { - -class TopTierRegisterAllocationData; - -// A register allocation pair of transformations: splinter and merge live ranges -class LiveRangeSeparator final : public ZoneObject { - public: - LiveRangeSeparator(TopTierRegisterAllocationData* data, Zone* zone) - : data_(data), zone_(zone) {} - - void Splinter(); - - private: - TopTierRegisterAllocationData* data() const { return data_; } - Zone* zone() const { return zone_; } - - TopTierRegisterAllocationData* const data_; - Zone* const zone_; - - DISALLOW_COPY_AND_ASSIGN(LiveRangeSeparator); -}; - -class LiveRangeMerger final : public ZoneObject { - public: - LiveRangeMerger(TopTierRegisterAllocationData* data, Zone* zone) - : data_(data), zone_(zone) {} - - void Merge(); - - private: - TopTierRegisterAllocationData* data() const { return data_; } - Zone* zone() const { return zone_; } - - // Mark ranges spilled in deferred blocks, that also cover non-deferred code. - // We do nothing special for ranges fully contained in deferred blocks, - // because they would "spill in deferred blocks" anyway. - void MarkRangesSpilledInDeferredBlocks(); - - TopTierRegisterAllocationData* const data_; - Zone* const zone_; - - DISALLOW_COPY_AND_ASSIGN(LiveRangeMerger); -}; - -} // namespace compiler -} // namespace internal -} // namespace v8 -#endif // V8_COMPILER_BACKEND_LIVE_RANGE_SEPARATOR_H_ diff --git a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc index 07416ab8ba..5457883fee 100644 --- a/deps/v8/src/compiler/backend/mips/code-generator-mips.cc +++ b/deps/v8/src/compiler/backend/mips/code-generator-mips.cc @@ -1239,7 +1239,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kMipsAbsS: - __ abs_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); + if (IsMipsArchVariant(kMips32r6)) { + __ abs_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); + } else { + __ mfc1(kScratchReg, i.InputSingleRegister(0)); + __ Ins(kScratchReg, zero_reg, 31, 1); + __ mtc1(kScratchReg, i.OutputSingleRegister()); + } break; case kMipsSqrtS: { __ sqrt_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); @@ -1330,9 +1336,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ MovFromFloatResult(i.OutputDoubleRegister()); break; } - case kMipsAbsD: - __ abs_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); + case kMipsAbsD: { + FPURegister src = i.InputDoubleRegister(0); + FPURegister dst = i.OutputDoubleRegister(); + if (IsMipsArchVariant(kMips32r6)) { + __ abs_d(dst, src); + } else { + __ Move(dst, src); + __ mfhc1(kScratchReg, src); + __ Ins(kScratchReg, zero_reg, 31, 1); + __ mthc1(kScratchReg, dst); + } break; + } case kMipsNegS: __ Neg_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); break; @@ -3274,7 +3290,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ sldi_b(dst, i.InputSimd128Register(1), i.InputInt4(2)); break; } - case kMipsS8x16Shuffle: { + case kMipsI8x16Shuffle: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Simd128Register dst = i.OutputSimd128Register(), src0 = i.InputSimd128Register(0), @@ -3299,7 +3315,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshf_b(dst, src1, src0); break; } - case kMipsS8x16Swizzle: { + case kMipsI8x16Swizzle: { Simd128Register dst = i.OutputSimd128Register(), tbl = i.InputSimd128Register(0), ctl = i.InputSimd128Register(1); @@ -3905,9 +3921,6 @@ void CodeGenerator::AssembleConstructFrame() { } } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); - if (call_descriptor->PushArgumentCount()) { - __ Push(kJavaScriptCallArgCountRegister); - } } else { __ StubPrologue(info()->GetOutputStackFrameType()); if (call_descriptor->IsWasmFunctionCall()) { diff --git a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h index b95bd82d28..46ce3d359a 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h +++ b/deps/v8/src/compiler/backend/mips/instruction-codes-mips.h @@ -307,8 +307,8 @@ namespace compiler { V(MipsS8x16PackOdd) \ V(MipsS8x16InterleaveEven) \ V(MipsS8x16InterleaveOdd) \ - V(MipsS8x16Shuffle) \ - V(MipsS8x16Swizzle) \ + V(MipsI8x16Shuffle) \ + V(MipsI8x16Swizzle) \ V(MipsS8x16Concat) \ V(MipsS8x8Reverse) \ V(MipsS8x4Reverse) \ diff --git a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc index 507bb14664..64e78b8122 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc +++ b/deps/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc @@ -273,8 +273,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsS8x16InterleaveRight: case kMipsS8x16PackEven: case kMipsS8x16PackOdd: - case kMipsS8x16Shuffle: - case kMipsS8x16Swizzle: + case kMipsI8x16Shuffle: + case kMipsI8x16Swizzle: case kMipsS8x2Reverse: case kMipsS8x4Reverse: case kMipsS8x8Reverse: diff --git a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc index 6aabbf3761..b552b0dec1 100644 --- a/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc +++ b/deps/v8/src/compiler/backend/mips/instruction-selector-mips.cc @@ -1388,9 +1388,9 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { ArchOpcode opcode = kArchNop; switch (load_rep.representation()) { - case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kWord8: - UNREACHABLE(); + opcode = load_rep.IsUnsigned() ? kMipsLbu : kMipsLb; + break; case MachineRepresentation::kWord16: opcode = load_rep.IsUnsigned() ? kMipsUlhu : kMipsUlh; break; @@ -1409,6 +1409,7 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { case MachineRepresentation::kSimd128: opcode = kMipsMsaLd; break; + case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kCompressedPointer: // Fall through. case MachineRepresentation::kCompressed: // Fall through. case MachineRepresentation::kWord64: // Fall through. @@ -1446,9 +1447,9 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { case MachineRepresentation::kFloat64: opcode = kMipsUsdc1; break; - case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kWord8: - UNREACHABLE(); + opcode = kMipsSb; + break; case MachineRepresentation::kWord16: opcode = kMipsUsh; break; @@ -1461,6 +1462,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { case MachineRepresentation::kSimd128: opcode = kMipsMsaSt; break; + case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kCompressedPointer: // Fall through. case MachineRepresentation::kCompressed: // Fall through. case MachineRepresentation::kWord64: // Fall through. @@ -2378,7 +2380,7 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, } // namespace -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; bool is_swizzle; CanonicalizeShuffle(node, shuffle, &is_swizzle); @@ -2404,7 +2406,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle32x4))); return; } - Emit(kMipsS8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0), + Emit(kMipsI8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0), g.UseRegister(input1), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)), @@ -2412,15 +2414,14 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 12))); } -void InstructionSelector::VisitS8x16Swizzle(Node* node) { +void InstructionSelector::VisitI8x16Swizzle(Node* node) { MipsOperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register()}; // We don't want input 0 or input 1 to be the same as output, since we will // modify output before do the calculation. - Emit(kMipsS8x16Swizzle, g.DefineAsRegister(node), + Emit(kMipsI8x16Swizzle, g.DefineAsRegister(node), g.UseUniqueRegister(node->InputAt(0)), - g.UseUniqueRegister(node->InputAt(1)), - arraysize(temps), temps); + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) { diff --git a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc index 2fda592ae1..bb01eab924 100644 --- a/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/code-generator-mips64.cc @@ -1318,7 +1318,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kMips64AbsS: - __ abs_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); + if (kArchVariant == kMips64r6) { + __ abs_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); + } else { + __ mfc1(kScratchReg, i.InputSingleRegister(0)); + __ Dins(kScratchReg, zero_reg, 31, 1); + __ mtc1(kScratchReg, i.OutputSingleRegister()); + } break; case kMips64NegS: __ Neg_s(i.OutputSingleRegister(), i.InputSingleRegister(0)); @@ -1378,7 +1384,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kMips64AbsD: - __ abs_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); + if (kArchVariant == kMips64r6) { + __ abs_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); + } else { + __ dmfc1(kScratchReg, i.InputDoubleRegister(0)); + __ Dins(kScratchReg, zero_reg, 63, 1); + __ dmtc1(kScratchReg, i.OutputDoubleRegister()); + } break; case kMips64NegD: __ Neg_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); @@ -1810,19 +1822,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } break; case kMips64Peek: { - // The incoming value is 0-based, but we need a 1-based value. - int reverse_slot = i.InputInt32(0) + 1; + int reverse_slot = i.InputInt32(0); int offset = FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); if (instr->OutputAt(0)->IsFPRegister()) { LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); if (op->representation() == MachineRepresentation::kFloat64) { __ Ldc1(i.OutputDoubleRegister(), MemOperand(fp, offset)); - } else { - DCHECK_EQ(op->representation(), MachineRepresentation::kFloat32); - __ lwc1( + } else if (op->representation() == MachineRepresentation::kFloat32) { + __ Lwc1( i.OutputSingleRegister(0), MemOperand(fp, offset + kLessSignificantWordInDoublewordOffset)); + } else { + DCHECK_EQ(MachineRepresentation::kSimd128, op->representation()); + __ ld_b(i.OutputSimd128Register(), MemOperand(fp, offset)); } } else { __ Ld(i.OutputRegister(0), MemOperand(fp, offset)); @@ -2304,38 +2317,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kMips64F64x2Ceil: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - __ li(kScratchReg2, kRoundToPlusInf); - __ ctcmsa(MSACSR, kScratchReg2); - __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToPlusInf); break; } case kMips64F64x2Floor: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - __ li(kScratchReg2, kRoundToMinusInf); - __ ctcmsa(MSACSR, kScratchReg2); - __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToMinusInf); break; } case kMips64F64x2Trunc: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - __ li(kScratchReg2, kRoundToZero); - __ ctcmsa(MSACSR, kScratchReg2); - __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToZero); break; } case kMips64F64x2NearestInt: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - // kRoundToNearest == 0 - __ ctcmsa(MSACSR, zero_reg); - __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToNearest); break; } case kMips64I64x2ReplaceLane: { @@ -2676,38 +2677,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kMips64F32x4Ceil: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - __ li(kScratchReg2, kRoundToPlusInf); - __ ctcmsa(MSACSR, kScratchReg2); - __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToPlusInf); break; } case kMips64F32x4Floor: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - __ li(kScratchReg2, kRoundToMinusInf); - __ ctcmsa(MSACSR, kScratchReg2); - __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToMinusInf); break; } case kMips64F32x4Trunc: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - __ li(kScratchReg2, kRoundToZero); - __ ctcmsa(MSACSR, kScratchReg2); - __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToZero); break; } case kMips64F32x4NearestInt: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); - __ cfcmsa(kScratchReg, MSACSR); - // kRoundToNearest == 0 - __ ctcmsa(MSACSR, zero_reg); - __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); - __ ctcmsa(MSACSR, kScratchReg); + __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0), + kRoundToNearest); break; } case kMips64I32x4SConvertF32x4: { @@ -3520,7 +3509,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ sldi_b(dst, i.InputSimd128Register(1), i.InputInt4(2)); break; } - case kMips64S8x16Shuffle: { + case kMips64I8x16Shuffle: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Simd128Register dst = i.OutputSimd128Register(), src0 = i.InputSimd128Register(0), @@ -3545,7 +3534,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshf_b(dst, src1, src0); break; } - case kMips64S8x16Swizzle: { + case kMips64I8x16Swizzle: { Simd128Register dst = i.OutputSimd128Register(), tbl = i.InputSimd128Register(0), ctl = i.InputSimd128Register(1); @@ -4202,9 +4191,6 @@ void CodeGenerator::AssembleConstructFrame() { } } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); - if (call_descriptor->PushArgumentCount()) { - __ Push(kJavaScriptCallArgCountRegister); - } } else { __ StubPrologue(info()->GetOutputStackFrameType()); if (call_descriptor->IsWasmFunctionCall()) { diff --git a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h index fb60316517..577db6347c 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h +++ b/deps/v8/src/compiler/backend/mips64/instruction-codes-mips64.h @@ -342,8 +342,8 @@ namespace compiler { V(Mips64S8x16PackOdd) \ V(Mips64S8x16InterleaveEven) \ V(Mips64S8x16InterleaveOdd) \ - V(Mips64S8x16Shuffle) \ - V(Mips64S8x16Swizzle) \ + V(Mips64I8x16Shuffle) \ + V(Mips64I8x16Swizzle) \ V(Mips64S8x16Concat) \ V(Mips64S8x8Reverse) \ V(Mips64S8x4Reverse) \ diff --git a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc index 347cf577de..caf472bf30 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc @@ -305,8 +305,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64S8x2Reverse: case kMips64S8x4Reverse: case kMips64S8x8Reverse: - case kMips64S8x16Shuffle: - case kMips64S8x16Swizzle: + case kMips64I8x16Shuffle: + case kMips64I8x16Swizzle: case kMips64Sar: case kMips64Seb: case kMips64Seh: diff --git a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc index 16cc2bfa86..2c807b4183 100644 --- a/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc +++ b/deps/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc @@ -1399,35 +1399,40 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) { } } -void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { - Mips64OperandGenerator g(this); - Node* value = node->InputAt(0); - switch (value->opcode()) { +bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) { + DCHECK_NE(node->opcode(), IrOpcode::kPhi); + switch (node->opcode()) { // 32-bit operations will write their result in a 64 bit register, // clearing the top 32 bits of the destination register. case IrOpcode::kUint32Div: case IrOpcode::kUint32Mod: - case IrOpcode::kUint32MulHigh: { - Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value)); - return; - } + case IrOpcode::kUint32MulHigh: + return true; case IrOpcode::kLoad: { - LoadRepresentation load_rep = LoadRepresentationOf(value->op()); + LoadRepresentation load_rep = LoadRepresentationOf(node->op()); if (load_rep.IsUnsigned()) { switch (load_rep.representation()) { case MachineRepresentation::kWord8: case MachineRepresentation::kWord16: case MachineRepresentation::kWord32: - Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value)); - return; + return true; default: - break; + return false; } } - break; + return false; } default: - break; + return false; + } +} + +void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { + Mips64OperandGenerator g(this); + Node* value = node->InputAt(0); + if (ZeroExtendsWord32ToWord64(value)) { + Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value)); + return; } Emit(kMips64Dext, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), g.TempImmediate(0), g.TempImmediate(32)); @@ -1711,7 +1716,7 @@ void InstructionSelector::EmitPrepareResults( Node* node) { Mips64OperandGenerator g(this); - int reverse_slot = 0; + int reverse_slot = 1; for (PushParameter output : *results) { if (!output.location.IsCallerFrameSlot()) continue; // Skip any alignment holes in nodes. @@ -1721,6 +1726,8 @@ void InstructionSelector::EmitPrepareResults( MarkAsFloat32(output.node); } else if (output.location.GetType() == MachineType::Float64()) { MarkAsFloat64(output.node); + } else if (output.location.GetType() == MachineType::Simd128()) { + MarkAsSimd128(output.node); } Emit(kMips64Peek, g.DefineAsRegister(output.node), g.UseImmediate(reverse_slot)); @@ -1747,9 +1754,9 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { case MachineRepresentation::kFloat64: opcode = kMips64Uldc1; break; - case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kWord8: - UNREACHABLE(); + opcode = load_rep.IsUnsigned() ? kMips64Lbu : kMips64Lb; + break; case MachineRepresentation::kWord16: opcode = load_rep.IsUnsigned() ? kMips64Ulhu : kMips64Ulh; break; @@ -1765,6 +1772,7 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) { case MachineRepresentation::kSimd128: opcode = kMips64MsaLd; break; + case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kCompressedPointer: // Fall through. case MachineRepresentation::kCompressed: // Fall through. case MachineRepresentation::kNone: @@ -1799,9 +1807,9 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { case MachineRepresentation::kFloat64: opcode = kMips64Usdc1; break; - case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kWord8: - UNREACHABLE(); + opcode = kMips64Sb; + break; case MachineRepresentation::kWord16: opcode = kMips64Ush; break; @@ -1817,6 +1825,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) { case MachineRepresentation::kSimd128: opcode = kMips64MsaSt; break; + case MachineRepresentation::kBit: // Fall through. case MachineRepresentation::kCompressedPointer: // Fall through. case MachineRepresentation::kCompressed: // Fall through. case MachineRepresentation::kNone: @@ -3073,7 +3082,7 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, } // namespace -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; bool is_swizzle; CanonicalizeShuffle(node, shuffle, &is_swizzle); @@ -3099,7 +3108,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle32x4))); return; } - Emit(kMips64S8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0), + Emit(kMips64I8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0), g.UseRegister(input1), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)), @@ -3107,15 +3116,14 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 12))); } -void InstructionSelector::VisitS8x16Swizzle(Node* node) { +void InstructionSelector::VisitI8x16Swizzle(Node* node) { Mips64OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register()}; // We don't want input 0 or input 1 to be the same as output, since we will // modify output before do the calculation. - Emit(kMips64S8x16Swizzle, g.DefineAsRegister(node), + Emit(kMips64I8x16Swizzle, g.DefineAsRegister(node), g.UseUniqueRegister(node->InputAt(0)), - g.UseUniqueRegister(node->InputAt(1)), - arraysize(temps), temps); + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) { diff --git a/deps/v8/src/compiler/backend/ppc/OWNERS b/deps/v8/src/compiler/backend/ppc/OWNERS index 6d1a8fc472..6edd45a6ef 100644 --- a/deps/v8/src/compiler/backend/ppc/OWNERS +++ b/deps/v8/src/compiler/backend/ppc/OWNERS @@ -1,4 +1,4 @@ -jyan@ca.ibm.com +junyan@redhat.com joransiu@ca.ibm.com -michael_dawson@ca.ibm.com -miladfar@ca.ibm.com
\ No newline at end of file +midawson@redhat.com +mfarazma@redhat.com diff --git a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc index 9d112495b3..767247b2fd 100644 --- a/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc +++ b/deps/v8/src/compiler/backend/ppc/code-generator-ppc.cc @@ -449,41 +449,42 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr, DCHECK_EQ(LeaveRC, i.OutputRCBit()); \ } while (0) -#define ASSEMBLE_FLOAT_MAX() \ - do { \ - DoubleRegister left_reg = i.InputDoubleRegister(0); \ - DoubleRegister right_reg = i.InputDoubleRegister(1); \ - DoubleRegister result_reg = i.OutputDoubleRegister(); \ - Label check_nan_left, check_zero, return_left, return_right, done; \ - __ fcmpu(left_reg, right_reg); \ - __ bunordered(&check_nan_left); \ - __ beq(&check_zero); \ - __ bge(&return_left); \ - __ b(&return_right); \ - \ - __ bind(&check_zero); \ - __ fcmpu(left_reg, kDoubleRegZero); \ - /* left == right != 0. */ \ - __ bne(&return_left); \ - /* At this point, both left and right are either 0 or -0. */ \ - __ fadd(result_reg, left_reg, right_reg); \ - __ b(&done); \ - \ - __ bind(&check_nan_left); \ - __ fcmpu(left_reg, left_reg); \ - /* left == NaN. */ \ - __ bunordered(&return_left); \ - __ bind(&return_right); \ - if (right_reg != result_reg) { \ - __ fmr(result_reg, right_reg); \ - } \ - __ b(&done); \ - \ - __ bind(&return_left); \ - if (left_reg != result_reg) { \ - __ fmr(result_reg, left_reg); \ - } \ - __ bind(&done); \ +#define ASSEMBLE_FLOAT_MAX() \ + do { \ + DoubleRegister left_reg = i.InputDoubleRegister(0); \ + DoubleRegister right_reg = i.InputDoubleRegister(1); \ + DoubleRegister result_reg = i.OutputDoubleRegister(); \ + Label check_zero, return_left, return_right, return_nan, done; \ + __ fcmpu(left_reg, right_reg); \ + __ bunordered(&return_nan); \ + __ beq(&check_zero); \ + __ bge(&return_left); \ + __ b(&return_right); \ + \ + __ bind(&check_zero); \ + __ fcmpu(left_reg, kDoubleRegZero); \ + /* left == right != 0. */ \ + __ bne(&return_left); \ + /* At this point, both left and right are either 0 or -0. */ \ + __ fadd(result_reg, left_reg, right_reg); \ + __ b(&done); \ + \ + __ bind(&return_nan); \ + /* If left or right are NaN, fadd propagates the appropriate one.*/ \ + __ fadd(result_reg, left_reg, right_reg); \ + __ b(&done); \ + \ + __ bind(&return_right); \ + if (right_reg != result_reg) { \ + __ fmr(result_reg, right_reg); \ + } \ + __ b(&done); \ + \ + __ bind(&return_left); \ + if (left_reg != result_reg) { \ + __ fmr(result_reg, left_reg); \ + } \ + __ bind(&done); \ } while (0) #define ASSEMBLE_FLOAT_MIN() \ @@ -491,9 +492,9 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr, DoubleRegister left_reg = i.InputDoubleRegister(0); \ DoubleRegister right_reg = i.InputDoubleRegister(1); \ DoubleRegister result_reg = i.OutputDoubleRegister(); \ - Label check_nan_left, check_zero, return_left, return_right, done; \ + Label check_zero, return_left, return_right, return_nan, done; \ __ fcmpu(left_reg, right_reg); \ - __ bunordered(&check_nan_left); \ + __ bunordered(&return_nan); \ __ beq(&check_zero); \ __ ble(&return_left); \ __ b(&return_right); \ @@ -515,10 +516,10 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr, __ fneg(result_reg, result_reg); \ __ b(&done); \ \ - __ bind(&check_nan_left); \ - __ fcmpu(left_reg, left_reg); \ - /* left == NaN. */ \ - __ bunordered(&return_left); \ + __ bind(&return_nan); \ + /* If left or right are NaN, fadd propagates the appropriate one.*/ \ + __ fadd(result_reg, left_reg, right_reg); \ + __ b(&done); \ \ __ bind(&return_right); \ if (right_reg != result_reg) { \ @@ -3228,7 +3229,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } - case kPPC_S8x16Shuffle: { + case kPPC_I8x16Shuffle: { Simd128Register dst = i.OutputSimd128Register(), src0 = i.InputSimd128Register(0), src1 = i.InputSimd128Register(1); @@ -3287,7 +3288,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } - case kPPC_S8x16Swizzle: { + case kPPC_I8x16Swizzle: { // Reverse the input to match IBM lane numbering. Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); __ addi(sp, sp, Operand(-16)); @@ -3355,6 +3356,88 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kPPC_S128AndNot: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register src = i.InputSimd128Register(0); + __ vandc(dst, src, i.InputSimd128Register(1)); + break; + } + case kPPC_F64x2Div: { + __ xvdivdp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } +#define F64X2_MIN_MAX_NAN(result) \ + Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); \ + __ xvcmpeqdp(tempFPReg1, i.InputSimd128Register(0), \ + i.InputSimd128Register(0)); \ + __ vsel(result, i.InputSimd128Register(0), result, tempFPReg1); \ + __ xvcmpeqdp(tempFPReg1, i.InputSimd128Register(1), \ + i.InputSimd128Register(1)); \ + __ vsel(i.OutputSimd128Register(), i.InputSimd128Register(1), result, \ + tempFPReg1); + case kPPC_F64x2Min: { + __ xvmindp(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + // We need to check if an input is NAN and preserve it. + F64X2_MIN_MAX_NAN(kScratchDoubleReg) + break; + } + case kPPC_F64x2Max: { + __ xvmaxdp(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + // We need to check if an input is NAN and preserve it. + F64X2_MIN_MAX_NAN(kScratchDoubleReg) + break; + } +#undef F64X2_MIN_MAX_NAN + case kPPC_F32x4Div: { + __ xvdivsp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F32x4Min: { + __ vminfp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F32x4Max: { + __ vmaxfp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F64x2Ceil: { + __ xvrdpip(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kPPC_F64x2Floor: { + __ xvrdpim(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kPPC_F64x2Trunc: { + __ xvrdpiz(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kPPC_F64x2NearestInt: { + __ xvrdpi(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kPPC_F32x4Ceil: { + __ xvrspip(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kPPC_F32x4Floor: { + __ xvrspim(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kPPC_F32x4Trunc: { + __ xvrspiz(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } + case kPPC_F32x4NearestInt: { + __ xvrspi(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kPPC_StoreCompressTagged: { ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX); break; @@ -3631,9 +3714,6 @@ void CodeGenerator::AssembleConstructFrame() { } } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); - if (call_descriptor->PushArgumentCount()) { - __ Push(kJavaScriptCallArgCountRegister); - } } else { StackFrame::Type type = info()->GetOutputStackFrameType(); // TODO(mbrandy): Detect cases where ip is the entrypoint (for diff --git a/deps/v8/src/compiler/backend/ppc/instruction-codes-ppc.h b/deps/v8/src/compiler/backend/ppc/instruction-codes-ppc.h index 9dc7bf49d0..fb5151ebd4 100644 --- a/deps/v8/src/compiler/backend/ppc/instruction-codes-ppc.h +++ b/deps/v8/src/compiler/backend/ppc/instruction-codes-ppc.h @@ -207,6 +207,13 @@ namespace compiler { V(PPC_F64x2Sqrt) \ V(PPC_F64x2Qfma) \ V(PPC_F64x2Qfms) \ + V(PPC_F64x2Div) \ + V(PPC_F64x2Min) \ + V(PPC_F64x2Max) \ + V(PPC_F64x2Ceil) \ + V(PPC_F64x2Floor) \ + V(PPC_F64x2Trunc) \ + V(PPC_F64x2NearestInt) \ V(PPC_F32x4Splat) \ V(PPC_F32x4ExtractLane) \ V(PPC_F32x4ReplaceLane) \ @@ -225,6 +232,13 @@ namespace compiler { V(PPC_F32x4Sqrt) \ V(PPC_F32x4SConvertI32x4) \ V(PPC_F32x4UConvertI32x4) \ + V(PPC_F32x4Div) \ + V(PPC_F32x4Min) \ + V(PPC_F32x4Max) \ + V(PPC_F32x4Ceil) \ + V(PPC_F32x4Floor) \ + V(PPC_F32x4Trunc) \ + V(PPC_F32x4NearestInt) \ V(PPC_I64x2Splat) \ V(PPC_I64x2ExtractLane) \ V(PPC_I64x2ReplaceLane) \ @@ -338,8 +352,8 @@ namespace compiler { V(PPC_I8x16AddSaturateU) \ V(PPC_I8x16SubSaturateU) \ V(PPC_I8x16RoundingAverageU) \ - V(PPC_S8x16Shuffle) \ - V(PPC_S8x16Swizzle) \ + V(PPC_I8x16Shuffle) \ + V(PPC_I8x16Swizzle) \ V(PPC_V64x2AnyTrue) \ V(PPC_V32x4AnyTrue) \ V(PPC_V16x8AnyTrue) \ @@ -354,6 +368,7 @@ namespace compiler { V(PPC_S128Zero) \ V(PPC_S128Not) \ V(PPC_S128Select) \ + V(PPC_S128AndNot) \ V(PPC_StoreCompressTagged) \ V(PPC_LoadDecompressTaggedSigned) \ V(PPC_LoadDecompressTaggedPointer) \ diff --git a/deps/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc b/deps/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc index 0493d81dd7..8beaa8539c 100644 --- a/deps/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc +++ b/deps/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc @@ -130,6 +130,13 @@ int InstructionScheduler::GetTargetInstructionFlags( case kPPC_F64x2Sqrt: case kPPC_F64x2Qfma: case kPPC_F64x2Qfms: + case kPPC_F64x2Div: + case kPPC_F64x2Min: + case kPPC_F64x2Max: + case kPPC_F64x2Ceil: + case kPPC_F64x2Floor: + case kPPC_F64x2Trunc: + case kPPC_F64x2NearestInt: case kPPC_F32x4Splat: case kPPC_F32x4ExtractLane: case kPPC_F32x4ReplaceLane: @@ -150,6 +157,13 @@ int InstructionScheduler::GetTargetInstructionFlags( case kPPC_F32x4UConvertI32x4: case kPPC_F32x4Qfma: case kPPC_F32x4Qfms: + case kPPC_F32x4Div: + case kPPC_F32x4Min: + case kPPC_F32x4Max: + case kPPC_F32x4Ceil: + case kPPC_F32x4Floor: + case kPPC_F32x4Trunc: + case kPPC_F32x4NearestInt: case kPPC_I64x2Splat: case kPPC_I64x2ExtractLane: case kPPC_I64x2ReplaceLane: @@ -261,8 +275,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kPPC_I8x16AddSaturateU: case kPPC_I8x16SubSaturateU: case kPPC_I8x16RoundingAverageU: - case kPPC_S8x16Shuffle: - case kPPC_S8x16Swizzle: + case kPPC_I8x16Shuffle: + case kPPC_I8x16Swizzle: case kPPC_V64x2AnyTrue: case kPPC_V32x4AnyTrue: case kPPC_V16x8AnyTrue: @@ -277,6 +291,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kPPC_S128Zero: case kPPC_S128Not: case kPPC_S128Select: + case kPPC_S128AndNot: return kNoOpcodeFlags; case kPPC_LoadWordS8: diff --git a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc index 507542e28c..0c61821cf5 100644 --- a/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc +++ b/deps/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc @@ -1200,6 +1200,10 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) { VisitRR(this, kPPC_ExtendSignWord32, node); } +bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) { + UNIMPLEMENTED(); +} + void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { // TODO(mbrandy): inspect input to see if nop is appropriate. VisitRR(this, kPPC_Uint32ToUint64, node); @@ -2156,6 +2160,9 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F64x2Ne) \ V(F64x2Le) \ V(F64x2Lt) \ + V(F64x2Div) \ + V(F64x2Min) \ + V(F64x2Max) \ V(F32x4Add) \ V(F32x4AddHoriz) \ V(F32x4Sub) \ @@ -2164,6 +2171,9 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4Ne) \ V(F32x4Lt) \ V(F32x4Le) \ + V(F32x4Div) \ + V(F32x4Min) \ + V(F32x4Max) \ V(I64x2Add) \ V(I64x2Sub) \ V(I64x2Mul) \ @@ -2222,15 +2232,20 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I8x16AddSaturateU) \ V(I8x16SubSaturateU) \ V(I8x16RoundingAverageU) \ + V(I8x16Swizzle) \ V(S128And) \ V(S128Or) \ V(S128Xor) \ - V(S8x16Swizzle) + V(S128AndNot) #define SIMD_UNOP_LIST(V) \ V(F64x2Abs) \ V(F64x2Neg) \ V(F64x2Sqrt) \ + V(F64x2Ceil) \ + V(F64x2Floor) \ + V(F64x2Trunc) \ + V(F64x2NearestInt) \ V(F32x4Abs) \ V(F32x4Neg) \ V(F32x4RecipApprox) \ @@ -2238,6 +2253,10 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4Sqrt) \ V(F32x4SConvertI32x4) \ V(F32x4UConvertI32x4) \ + V(F32x4Ceil) \ + V(F32x4Floor) \ + V(F32x4Trunc) \ + V(F32x4NearestInt) \ V(I64x2Neg) \ V(I32x4Neg) \ V(I32x4Abs) \ @@ -2361,7 +2380,7 @@ SIMD_BOOL_LIST(SIMD_VISIT_BOOL) #undef SIMD_BOOL_LIST #undef SIMD_TYPES -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; bool is_swizzle; CanonicalizeShuffle(node, shuffle, &is_swizzle); @@ -2378,7 +2397,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { ? max_index - current_index : total_lane_count - current_index + max_index); } - Emit(kPPC_S8x16Shuffle, g.DefineAsRegister(node), g.UseUniqueRegister(input0), + Emit(kPPC_I8x16Shuffle, g.DefineAsRegister(node), g.UseUniqueRegister(input0), g.UseUniqueRegister(input1), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_remapped)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_remapped + 4)), @@ -2400,7 +2419,11 @@ void InstructionSelector::VisitS128Select(Node* node) { void InstructionSelector::VisitS128Const(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS128AndNot(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitI16x8BitMask(Node* node) { UNIMPLEMENTED(); } + +void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::EmitPrepareResults( ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor, @@ -2427,19 +2450,15 @@ void InstructionSelector::EmitPrepareResults( } } -void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } // static MachineOperatorBuilder::Flags diff --git a/deps/v8/src/compiler/backend/register-allocator.cc b/deps/v8/src/compiler/backend/register-allocator.cc index b3b40281b9..30724647c6 100644 --- a/deps/v8/src/compiler/backend/register-allocator.cc +++ b/deps/v8/src/compiler/backend/register-allocator.cc @@ -300,7 +300,6 @@ LiveRange::LiveRange(int relative_id, MachineRepresentation rep, next_(nullptr), current_interval_(nullptr), last_processed_use_(nullptr), - splitting_pointer_(nullptr), current_hint_position_(nullptr) { DCHECK(AllocatedOperand::IsSupportedRepresentation(rep)); bits_ = AssignedRegisterField::encode(kUnassignedRegister) | @@ -597,10 +596,7 @@ UsePosition* LiveRange::DetachAt(LifetimePosition position, LiveRange* result, // Find the last use position before the split and the first use // position after it. - UsePosition* use_after = - splitting_pointer_ == nullptr || splitting_pointer_->pos() > position - ? first_pos() - : splitting_pointer_; + UsePosition* use_after = first_pos(); UsePosition* use_before = nullptr; if (split_at_start) { // The split position coincides with the beginning of a use interval (the @@ -819,24 +815,16 @@ TopLevelLiveRange::TopLevelLiveRange(int vreg, MachineRepresentation rep) : LiveRange(0, rep, this), vreg_(vreg), last_child_id_(0), - splintered_from_(nullptr), spill_operand_(nullptr), spill_move_insertion_locations_(nullptr), spilled_in_deferred_blocks_(false), has_preassigned_slot_(false), spill_start_index_(kMaxInt), last_pos_(nullptr), - last_child_covers_(this), - splinter_(nullptr) { + last_child_covers_(this) { bits_ |= SpillTypeField::encode(SpillType::kNoSpillType); } -#if DEBUG -int TopLevelLiveRange::debug_virt_reg() const { - return IsSplinter() ? splintered_from()->vreg() : vreg(); -} -#endif - void TopLevelLiveRange::RecordSpillLocation(Zone* zone, int gap_index, InstructionOperand* operand) { DCHECK(HasNoSpillType()); @@ -925,157 +913,6 @@ AllocatedOperand TopLevelLiveRange::GetSpillRangeOperand() const { return AllocatedOperand(LocationOperand::STACK_SLOT, representation(), index); } -void TopLevelLiveRange::Splinter(LifetimePosition start, LifetimePosition end, - Zone* zone) { - DCHECK(start != Start() || end != End()); - DCHECK(start < end); - - TopLevelLiveRange splinter_temp(-1, representation()); - UsePosition* last_in_splinter = nullptr; - // Live ranges defined in deferred blocks stay in deferred blocks, so we - // don't need to splinter them. That means that start should always be - // after the beginning of the range. - DCHECK(start > Start()); - - if (end >= End()) { - DCHECK(start > Start()); - DetachAt(start, &splinter_temp, zone, ConnectHints); - next_ = nullptr; - } else { - DCHECK(start < End() && Start() < end); - - const int kInvalidId = std::numeric_limits<int>::max(); - - UsePosition* last = DetachAt(start, &splinter_temp, zone, ConnectHints); - - LiveRange end_part(kInvalidId, this->representation(), nullptr); - // The last chunk exits the deferred region, and we don't want to connect - // hints here, because the non-deferred region shouldn't be affected - // by allocation decisions on the deferred path. - last_in_splinter = - splinter_temp.DetachAt(end, &end_part, zone, DoNotConnectHints); - - next_ = end_part.next_; - last_interval_->set_next(end_part.first_interval_); - // The next splinter will happen either at or after the current interval. - // We can optimize DetachAt by setting current_interval_ accordingly, - // which will then be picked up by FirstSearchIntervalForPosition. - current_interval_ = last_interval_; - last_interval_ = end_part.last_interval_; - - if (first_pos_ == nullptr) { - first_pos_ = end_part.first_pos_; - } else { - splitting_pointer_ = last; - if (last != nullptr) last->set_next(end_part.first_pos_); - } - } - - if (splinter()->IsEmpty()) { - splinter()->first_interval_ = splinter_temp.first_interval_; - splinter()->last_interval_ = splinter_temp.last_interval_; - } else { - splinter()->last_interval_->set_next(splinter_temp.first_interval_); - splinter()->last_interval_ = splinter_temp.last_interval_; - } - if (splinter()->first_pos() == nullptr) { - splinter()->first_pos_ = splinter_temp.first_pos_; - } else { - splinter()->last_pos_->set_next(splinter_temp.first_pos_); - } - if (last_in_splinter != nullptr) { - splinter()->last_pos_ = last_in_splinter; - } else { - if (splinter()->first_pos() != nullptr && - splinter()->last_pos_ == nullptr) { - splinter()->last_pos_ = splinter()->first_pos(); - for (UsePosition* pos = splinter()->first_pos(); pos != nullptr; - pos = pos->next()) { - splinter()->last_pos_ = pos; - } - } - } -#if DEBUG - Verify(); - splinter()->Verify(); -#endif -} - -void TopLevelLiveRange::SetSplinteredFrom(TopLevelLiveRange* splinter_parent) { - splintered_from_ = splinter_parent; - if (!HasSpillOperand() && splinter_parent->spill_range_ != nullptr) { - SetSpillRange(splinter_parent->spill_range_); - } -} - -void TopLevelLiveRange::UpdateSpillRangePostMerge(TopLevelLiveRange* merged) { - DCHECK(merged->TopLevel() == this); - - if (HasNoSpillType() && merged->HasSpillRange()) { - set_spill_type(merged->spill_type()); - DCHECK_LT(0, GetSpillRange()->live_ranges().size()); - merged->spill_range_ = nullptr; - merged->bits_ = - SpillTypeField::update(merged->bits_, SpillType::kNoSpillType); - } -} - -void TopLevelLiveRange::Merge(TopLevelLiveRange* other, Zone* zone) { - DCHECK(Start() < other->Start()); - DCHECK(other->splintered_from() == this); - - LiveRange* first = this; - LiveRange* second = other; - DCHECK(first->Start() < second->Start()); - while (first != nullptr && second != nullptr) { - DCHECK(first != second); - // Make sure the ranges are in order each time we iterate. - if (second->Start() < first->Start()) { - LiveRange* tmp = second; - second = first; - first = tmp; - continue; - } - - if (first->End() <= second->Start()) { - if (first->next() == nullptr || - first->next()->Start() > second->Start()) { - // First is in order before second. - LiveRange* temp = first->next(); - first->next_ = second; - first = temp; - } else { - // First is in order before its successor (or second), so advance first. - first = first->next(); - } - continue; - } - - DCHECK(first->Start() < second->Start()); - // If first and second intersect, split first. - if (first->Start() < second->End() && second->Start() < first->End()) { - LiveRange* temp = first->SplitAt(second->Start(), zone); - CHECK(temp != first); - temp->set_spilled(first->spilled()); - if (!temp->spilled()) - temp->set_assigned_register(first->assigned_register()); - - first->next_ = second; - first = temp; - continue; - } - DCHECK(first->End() <= second->Start()); - } - - TopLevel()->UpdateParentForAllChildren(TopLevel()); - TopLevel()->UpdateSpillRangePostMerge(other); - TopLevel()->register_slot_use(other->slot_use_kind()); - -#if DEBUG - Verify(); -#endif -} - void TopLevelLiveRange::VerifyChildrenInOrder() const { LifetimePosition last_end = End(); for (const LiveRange* child = this->next(); child != nullptr; @@ -1271,8 +1108,7 @@ void PrintBlockRow(std::ostream& os, const InstructionBlocks& blocks) { void LinearScanAllocator::PrintRangeRow(std::ostream& os, const TopLevelLiveRange* toplevel) { int position = 0; - os << std::setw(3) << toplevel->vreg() - << (toplevel->IsSplinter() ? "s:" : ": "); + os << std::setw(3) << toplevel->vreg() << ": "; const char* kind_string; switch (toplevel->spill_type()) { @@ -1340,10 +1176,9 @@ SpillRange::SpillRange(TopLevelLiveRange* parent, Zone* zone) : live_ranges_(zone), assigned_slot_(kUnassignedSlot), byte_width_(ByteWidthForStackSlot(parent->representation())) { - // Spill ranges are created for top level, non-splintered ranges. This is so - // that, when merging decisions are made, we consider the full extent of the - // virtual register, and avoid clobbering it. - DCHECK(!parent->IsSplinter()); + // Spill ranges are created for top level. This is so that, when merging + // decisions are made, we consider the full extent of the virtual register, + // and avoid clobbering it. UseInterval* result = nullptr; UseInterval* node = nullptr; // Copy the intervals for all ranges. @@ -1646,32 +1481,16 @@ SpillRange* TopTierRegisterAllocationData::AssignSpillRangeToLiveRange( SpillRange* spill_range = range->GetAllocatedSpillRange(); if (spill_range == nullptr) { - DCHECK(!range->IsSplinter()); spill_range = allocation_zone()->New<SpillRange>(range, allocation_zone()); } if (spill_mode == SpillMode::kSpillDeferred && (range->spill_type() != SpillType::kSpillRange)) { - DCHECK(is_turbo_control_flow_aware_allocation()); range->set_spill_type(SpillType::kDeferredSpillRange); } else { range->set_spill_type(SpillType::kSpillRange); } - int spill_range_index = - range->IsSplinter() ? range->splintered_from()->vreg() : range->vreg(); - - spill_ranges()[spill_range_index] = spill_range; - - return spill_range; -} - -SpillRange* TopTierRegisterAllocationData::CreateSpillRangeForLiveRange( - TopLevelLiveRange* range) { - DCHECK(is_turbo_preprocess_ranges()); - DCHECK(!range->HasSpillOperand()); - DCHECK(!range->IsSplinter()); - SpillRange* spill_range = - allocation_zone()->New<SpillRange>(range, allocation_zone()); + spill_ranges()[range->vreg()] = spill_range; return spill_range; } @@ -2337,15 +2156,10 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block, int vreg = unalloc->virtual_register(); live->Add(vreg); if (unalloc->HasSlotPolicy()) { - if (data()->is_turbo_control_flow_aware_allocation()) { - data()->GetOrCreateLiveRangeFor(vreg)->register_slot_use( - block->IsDeferred() - ? TopLevelLiveRange::SlotUseKind::kDeferredSlotUse - : TopLevelLiveRange::SlotUseKind::kGeneralSlotUse); - } else { - data()->GetOrCreateLiveRangeFor(vreg)->register_slot_use( - TopLevelLiveRange::SlotUseKind::kGeneralSlotUse); - } + data()->GetOrCreateLiveRangeFor(vreg)->register_slot_use( + block->IsDeferred() + ? TopLevelLiveRange::SlotUseKind::kDeferredSlotUse + : TopLevelLiveRange::SlotUseKind::kGeneralSlotUse); } } Use(block_start_position, use_pos, input, spill_mode); @@ -2917,12 +2731,7 @@ void RegisterAllocator::SplitAndSpillRangesDefinedByMemoryOperand() { next_pos = next_pos.NextStart(); } - // With splinters, we can be more strict and skip over positions - // not strictly needing registers. - UsePosition* pos = - range->IsSplinter() - ? range->NextRegisterPosition(next_pos) - : range->NextUsePositionRegisterIsBeneficial(next_pos); + UsePosition* pos = range->NextUsePositionRegisterIsBeneficial(next_pos); // If the range already has a spill operand and it doesn't need a // register immediately, split it and spill the first part of the range. if (pos == nullptr) { @@ -3025,66 +2834,42 @@ LifetimePosition RegisterAllocator::FindOptimalSpillingPos( block->IsLoopHeader() ? block : GetContainingLoop(code(), block); if (loop_header == nullptr) return pos; - if (data()->is_turbo_control_flow_aware_allocation()) { - while (loop_header != nullptr) { - // We are going to spill live range inside the loop. - // If possible try to move spilling position backwards to loop header. - // This will reduce number of memory moves on the back edge. - LifetimePosition loop_start = LifetimePosition::GapFromInstructionIndex( - loop_header->first_instruction_index()); - // Stop if we moved to a loop header before the value is defined or - // at the define position that is not beneficial to spill. - if (range->TopLevel()->Start() > loop_start || - (range->TopLevel()->Start() == loop_start && - range->TopLevel()->SpillAtLoopHeaderNotBeneficial())) - return pos; - - LiveRange* live_at_header = range->TopLevel()->GetChildCovers(loop_start); - - if (live_at_header != nullptr && !live_at_header->spilled()) { - for (LiveRange* check_use = live_at_header; - check_use != nullptr && check_use->Start() < pos; - check_use = check_use->next()) { - // If we find a use for which spilling is detrimental, don't spill - // at the loop header - UsePosition* next_use = - check_use->NextUsePositionSpillDetrimental(loop_start); - // UsePosition at the end of a UseInterval may - // have the same value as the start of next range. - if (next_use != nullptr && next_use->pos() <= pos) { - return pos; - } + while (loop_header != nullptr) { + // We are going to spill live range inside the loop. + // If possible try to move spilling position backwards to loop header. + // This will reduce number of memory moves on the back edge. + LifetimePosition loop_start = LifetimePosition::GapFromInstructionIndex( + loop_header->first_instruction_index()); + // Stop if we moved to a loop header before the value is defined or + // at the define position that is not beneficial to spill. + if (range->TopLevel()->Start() > loop_start || + (range->TopLevel()->Start() == loop_start && + range->TopLevel()->SpillAtLoopHeaderNotBeneficial())) + return pos; + + LiveRange* live_at_header = range->TopLevel()->GetChildCovers(loop_start); + + if (live_at_header != nullptr && !live_at_header->spilled()) { + for (LiveRange* check_use = live_at_header; + check_use != nullptr && check_use->Start() < pos; + check_use = check_use->next()) { + // If we find a use for which spilling is detrimental, don't spill + // at the loop header + UsePosition* next_use = + check_use->NextUsePositionSpillDetrimental(loop_start); + // UsePosition at the end of a UseInterval may + // have the same value as the start of next range. + if (next_use != nullptr && next_use->pos() <= pos) { + return pos; } - // No register beneficial use inside the loop before the pos. - *begin_spill_out = live_at_header; - pos = loop_start; } - - // Try hoisting out to an outer loop. - loop_header = GetContainingLoop(code(), loop_header); + // No register beneficial use inside the loop before the pos. + *begin_spill_out = live_at_header; + pos = loop_start; } - } else { - const UsePosition* prev_use = - range->PreviousUsePositionRegisterIsBeneficial(pos); - - while (loop_header != nullptr) { - // We are going to spill live range inside the loop. - // If possible try to move spilling position backwards to loop header - // inside the current range. This will reduce number of memory moves on - // the back edge. - LifetimePosition loop_start = LifetimePosition::GapFromInstructionIndex( - loop_header->first_instruction_index()); - - if (range->Covers(loop_start)) { - if (prev_use == nullptr || prev_use->pos() < loop_start) { - // No register beneficial use inside the loop before the pos. - pos = loop_start; - } - } - // Try hoisting out to an outer loop. - loop_header = GetContainingLoop(code(), loop_header); - } + // Try hoisting out to an outer loop. + loop_header = GetContainingLoop(code(), loop_header); } return pos; } @@ -3761,9 +3546,7 @@ void LinearScanAllocator::AllocateRegisters() { // those. Not only does this produce a potentially bad assignment, it also // breaks with the invariant that we undo spills that happen in deferred code // when crossing a deferred/non-deferred boundary. - while (!unhandled_live_ranges().empty() || - (data()->is_turbo_control_flow_aware_allocation() && - last_block < max_blocks)) { + while (!unhandled_live_ranges().empty() || last_block < max_blocks) { data()->tick_counter()->TickAndMaybeEnterSafepoint(); LiveRange* current = unhandled_live_ranges().empty() ? nullptr @@ -3773,160 +3556,155 @@ void LinearScanAllocator::AllocateRegisters() { #ifdef DEBUG allocation_finger_ = position; #endif - if (data()->is_turbo_control_flow_aware_allocation()) { - // Splintering is not supported. - CHECK(!data()->is_turbo_preprocess_ranges()); - // Check whether we just moved across a block boundary. This will trigger - // for the first range that is past the current boundary. - if (position >= next_block_boundary) { - TRACE("Processing boundary at %d leaving %d\n", - next_block_boundary.value(), last_block.ToInt()); - - // Forward state to before block boundary - LifetimePosition end_of_block = next_block_boundary.PrevStart().End(); - ForwardStateTo(end_of_block); - - // Remember this state. - InstructionBlock* current_block = data()->code()->GetInstructionBlock( - next_block_boundary.ToInstructionIndex()); - - // Store current spill state (as the state at end of block). For - // simplicity, we store the active ranges, e.g., the live ranges that - // are not spilled. - data()->RememberSpillState(last_block, active_live_ranges()); - - // Only reset the state if this was not a direct fallthrough. Otherwise - // control flow resolution will get confused (it does not expect changes - // across fallthrough edges.). - bool fallthrough = (current_block->PredecessorCount() == 1) && - current_block->predecessors()[0].IsNext( - current_block->rpo_number()); - - // When crossing a deferred/non-deferred boundary, we have to load or - // remove the deferred fixed ranges from inactive. - if ((spill_mode == SpillMode::kSpillDeferred) != - current_block->IsDeferred()) { - // Update spill mode. - spill_mode = current_block->IsDeferred() - ? SpillMode::kSpillDeferred - : SpillMode::kSpillAtDefinition; - - ForwardStateTo(next_block_boundary); + // Check whether we just moved across a block boundary. This will trigger + // for the first range that is past the current boundary. + if (position >= next_block_boundary) { + TRACE("Processing boundary at %d leaving %d\n", + next_block_boundary.value(), last_block.ToInt()); + + // Forward state to before block boundary + LifetimePosition end_of_block = next_block_boundary.PrevStart().End(); + ForwardStateTo(end_of_block); + + // Remember this state. + InstructionBlock* current_block = data()->code()->GetInstructionBlock( + next_block_boundary.ToInstructionIndex()); + + // Store current spill state (as the state at end of block). For + // simplicity, we store the active ranges, e.g., the live ranges that + // are not spilled. + data()->RememberSpillState(last_block, active_live_ranges()); + + // Only reset the state if this was not a direct fallthrough. Otherwise + // control flow resolution will get confused (it does not expect changes + // across fallthrough edges.). + bool fallthrough = + (current_block->PredecessorCount() == 1) && + current_block->predecessors()[0].IsNext(current_block->rpo_number()); + + // When crossing a deferred/non-deferred boundary, we have to load or + // remove the deferred fixed ranges from inactive. + if ((spill_mode == SpillMode::kSpillDeferred) != + current_block->IsDeferred()) { + // Update spill mode. + spill_mode = current_block->IsDeferred() + ? SpillMode::kSpillDeferred + : SpillMode::kSpillAtDefinition; + + ForwardStateTo(next_block_boundary); #ifdef DEBUG - // Allow allocation at current position. - allocation_finger_ = next_block_boundary; + // Allow allocation at current position. + allocation_finger_ = next_block_boundary; #endif - UpdateDeferredFixedRanges(spill_mode, current_block); - } + UpdateDeferredFixedRanges(spill_mode, current_block); + } - // Allocation relies on the fact that each non-deferred block has at - // least one non-deferred predecessor. Check this invariant here. - DCHECK_IMPLIES(!current_block->IsDeferred(), - HasNonDeferredPredecessor(current_block)); + // Allocation relies on the fact that each non-deferred block has at + // least one non-deferred predecessor. Check this invariant here. + DCHECK_IMPLIES(!current_block->IsDeferred(), + HasNonDeferredPredecessor(current_block)); - if (!fallthrough) { + if (!fallthrough) { #ifdef DEBUG - // Allow allocation at current position. - allocation_finger_ = next_block_boundary; + // Allow allocation at current position. + allocation_finger_ = next_block_boundary; #endif - // We are currently at next_block_boundary - 1. Move the state to the - // actual block boundary position. In particular, we have to - // reactivate inactive ranges so that they get rescheduled for - // allocation if they were not live at the predecessors. - ForwardStateTo(next_block_boundary); - - RangeWithRegisterSet to_be_live(data()->allocation_zone()); - - // If we end up deciding to use the state of the immediate - // predecessor, it is better not to perform a change. It would lead to - // the same outcome anyway. - // This may never happen on boundaries between deferred and - // non-deferred code, as we rely on explicit respill to ensure we - // spill at definition. - bool no_change_required = false; - - auto pick_state_from = [this, current_block]( - RpoNumber pred, - RangeWithRegisterSet* to_be_live) -> bool { - TRACE("Using information from B%d\n", pred.ToInt()); - // If this is a fall-through that is not across a deferred - // boundary, there is nothing to do. - bool is_noop = pred.IsNext(current_block->rpo_number()); - if (!is_noop) { - auto& spill_state = data()->GetSpillState(pred); - TRACE("Not a fallthrough. Adding %zu elements...\n", - spill_state.size()); - LifetimePosition pred_end = - LifetimePosition::GapFromInstructionIndex( - this->code()->InstructionBlockAt(pred)->code_end()); - for (const auto range : spill_state) { - // Filter out ranges that were split or had their register - // stolen by backwards working spill heuristics. These have - // been spilled after the fact, so ignore them. - if (range->End() < pred_end || !range->HasRegisterAssigned()) - continue; - to_be_live->emplace(range); - } - } - return is_noop; - }; - - // Multiple cases here: - // 1) We have a single predecessor => this is a control flow split, so - // just restore the predecessor state. - // 2) We have two predecessors => this is a conditional, so break ties - // based on what to do based on forward uses, trying to benefit - // the same branch if in doubt (make one path fast). - // 3) We have many predecessors => this is a switch. Compute union - // based on majority, break ties by looking forward. - if (current_block->PredecessorCount() == 1) { - TRACE("Single predecessor for B%d\n", - current_block->rpo_number().ToInt()); - no_change_required = - pick_state_from(current_block->predecessors()[0], &to_be_live); - } else if (current_block->PredecessorCount() == 2) { - TRACE("Two predecessors for B%d\n", - current_block->rpo_number().ToInt()); - // If one of the branches does not contribute any information, - // e.g. because it is deferred or a back edge, we can short cut - // here right away. - RpoNumber chosen_predecessor = RpoNumber::Invalid(); - if (!ConsiderBlockForControlFlow( - current_block, current_block->predecessors()[0])) { - chosen_predecessor = current_block->predecessors()[1]; - } else if (!ConsiderBlockForControlFlow( - current_block, current_block->predecessors()[1])) { - chosen_predecessor = current_block->predecessors()[0]; - } else { - chosen_predecessor = ChooseOneOfTwoPredecessorStates( - current_block, next_block_boundary); + // We are currently at next_block_boundary - 1. Move the state to the + // actual block boundary position. In particular, we have to + // reactivate inactive ranges so that they get rescheduled for + // allocation if they were not live at the predecessors. + ForwardStateTo(next_block_boundary); + + RangeWithRegisterSet to_be_live(data()->allocation_zone()); + + // If we end up deciding to use the state of the immediate + // predecessor, it is better not to perform a change. It would lead to + // the same outcome anyway. + // This may never happen on boundaries between deferred and + // non-deferred code, as we rely on explicit respill to ensure we + // spill at definition. + bool no_change_required = false; + + auto pick_state_from = [this, current_block]( + RpoNumber pred, + RangeWithRegisterSet* to_be_live) -> bool { + TRACE("Using information from B%d\n", pred.ToInt()); + // If this is a fall-through that is not across a deferred + // boundary, there is nothing to do. + bool is_noop = pred.IsNext(current_block->rpo_number()); + if (!is_noop) { + auto& spill_state = data()->GetSpillState(pred); + TRACE("Not a fallthrough. Adding %zu elements...\n", + spill_state.size()); + LifetimePosition pred_end = + LifetimePosition::GapFromInstructionIndex( + this->code()->InstructionBlockAt(pred)->code_end()); + for (const auto range : spill_state) { + // Filter out ranges that were split or had their register + // stolen by backwards working spill heuristics. These have + // been spilled after the fact, so ignore them. + if (range->End() < pred_end || !range->HasRegisterAssigned()) + continue; + to_be_live->emplace(range); } - no_change_required = - pick_state_from(chosen_predecessor, &to_be_live); - + } + return is_noop; + }; + + // Multiple cases here: + // 1) We have a single predecessor => this is a control flow split, so + // just restore the predecessor state. + // 2) We have two predecessors => this is a conditional, so break ties + // based on what to do based on forward uses, trying to benefit + // the same branch if in doubt (make one path fast). + // 3) We have many predecessors => this is a switch. Compute union + // based on majority, break ties by looking forward. + if (current_block->PredecessorCount() == 1) { + TRACE("Single predecessor for B%d\n", + current_block->rpo_number().ToInt()); + no_change_required = + pick_state_from(current_block->predecessors()[0], &to_be_live); + } else if (current_block->PredecessorCount() == 2) { + TRACE("Two predecessors for B%d\n", + current_block->rpo_number().ToInt()); + // If one of the branches does not contribute any information, + // e.g. because it is deferred or a back edge, we can short cut + // here right away. + RpoNumber chosen_predecessor = RpoNumber::Invalid(); + if (!ConsiderBlockForControlFlow(current_block, + current_block->predecessors()[0])) { + chosen_predecessor = current_block->predecessors()[1]; + } else if (!ConsiderBlockForControlFlow( + current_block, current_block->predecessors()[1])) { + chosen_predecessor = current_block->predecessors()[0]; } else { - // Merge at the end of, e.g., a switch. - ComputeStateFromManyPredecessors(current_block, &to_be_live); + chosen_predecessor = ChooseOneOfTwoPredecessorStates( + current_block, next_block_boundary); } + no_change_required = pick_state_from(chosen_predecessor, &to_be_live); - if (!no_change_required) { - SpillNotLiveRanges(&to_be_live, next_block_boundary, spill_mode); - ReloadLiveRanges(to_be_live, next_block_boundary); - } + } else { + // Merge at the end of, e.g., a switch. + ComputeStateFromManyPredecessors(current_block, &to_be_live); + } + + if (!no_change_required) { + SpillNotLiveRanges(&to_be_live, next_block_boundary, spill_mode); + ReloadLiveRanges(to_be_live, next_block_boundary); } - // Update block information - last_block = current_block->rpo_number(); - next_block_boundary = LifetimePosition::InstructionFromInstructionIndex( - current_block->last_instruction_index()) - .NextFullStart(); - - // We might have created new unhandled live ranges, so cycle around the - // loop to make sure we pick the top most range in unhandled for - // processing. - continue; } + // Update block information + last_block = current_block->rpo_number(); + next_block_boundary = LifetimePosition::InstructionFromInstructionIndex( + current_block->last_instruction_index()) + .NextFullStart(); + + // We might have created new unhandled live ranges, so cycle around the + // loop to make sure we pick the top most range in unhandled for + // processing. + continue; } DCHECK_NOT_NULL(current); @@ -3952,28 +3730,6 @@ void LinearScanAllocator::AllocateRegisters() { } } -bool LinearScanAllocator::TrySplitAndSpillSplinter(LiveRange* range) { - DCHECK(!data()->is_turbo_control_flow_aware_allocation()); - DCHECK(range->TopLevel()->IsSplinter()); - // If we can spill the whole range, great. Otherwise, split above the - // first use needing a register and spill the top part. - const UsePosition* next_reg = range->NextRegisterPosition(range->Start()); - if (next_reg == nullptr) { - Spill(range, SpillMode::kSpillAtDefinition); - return true; - } else if (range->FirstHintPosition() == nullptr) { - // If there was no hint, but we have a use position requiring a - // register, apply the hot path heuristics. - return false; - } else if (next_reg->pos().PrevStart() > range->Start()) { - LiveRange* tail = SplitRangeAt(range, next_reg->pos().PrevStart()); - AddToUnhandled(tail); - Spill(range, SpillMode::kSpillAtDefinition); - return true; - } - return false; -} - void LinearScanAllocator::SetLiveRangeAssignedRegister(LiveRange* range, int reg) { data()->MarkAllocated(range->representation(), reg); @@ -4198,49 +3954,17 @@ void LinearScanAllocator::FindFreeRegistersForRange( // High-level register allocation summary: // -// For regular, or hot (i.e. not splinter) ranges, we attempt to first -// allocate first the preferred (hint) register. If that is not possible, -// we find a register that's free, and allocate that. If that's not possible, -// we search for a register to steal from a range that was allocated. The -// goal is to optimize for throughput by avoiding register-to-memory -// moves, which are expensive. -// -// For splinters, the goal is to minimize the number of moves. First we try -// to allocate the preferred register (more discussion follows). Failing that, -// we bail out and spill as far as we can, unless the first use is at start, -// case in which we apply the same behavior as we do for regular ranges. -// If there is no hint, we apply the hot-path behavior. -// -// For the splinter, the hint register may come from: -// -// - the hot path (we set it at splintering time with SetHint). In this case, if -// we cannot offer the hint register, spilling is better because it's at most -// 1 move, while trying to find and offer another register is at least 1 move. -// -// - a constraint. If we cannot offer that register, it's because there is some -// interference. So offering the hint register up to the interference would -// result -// in a move at the interference, plus a move to satisfy the constraint. This is -// also the number of moves if we spill, with the potential of the range being -// already spilled and thus saving a move (the spill). -// Note that this can only be an input constraint, if it were an output one, -// the range wouldn't be a splinter because it means it'd be defined in a -// deferred -// block, and we don't mark those as splinters (they live in deferred blocks -// only). -// -// - a phi. The same analysis as in the case of the input constraint applies. -// +// We attempt to first allocate the preferred (hint) register. If that is not +// possible, we find a register that's free, and allocate that. If that's not +// possible, we search for a register to steal from a range that was allocated. +// The goal is to optimize for throughput by avoiding register-to-memory moves, +// which are expensive. void LinearScanAllocator::ProcessCurrentRange(LiveRange* current, SpillMode spill_mode) { EmbeddedVector<LifetimePosition, RegisterConfiguration::kMaxRegisters> free_until_pos; FindFreeRegistersForRange(current, free_until_pos); if (!TryAllocatePreferredReg(current, free_until_pos)) { - if (current->TopLevel()->IsSplinter()) { - DCHECK(!data()->is_turbo_control_flow_aware_allocation()); - if (TrySplitAndSpillSplinter(current)) return; - } if (!TryAllocateFreeReg(current, free_until_pos)) { AllocateBlockedReg(current, spill_mode); } @@ -4725,30 +4449,26 @@ OperandAssigner::OperandAssigner(TopTierRegisterAllocationData* data) : data_(data) {} void OperandAssigner::DecideSpillingMode() { - if (data()->is_turbo_control_flow_aware_allocation()) { - for (auto range : data()->live_ranges()) { - data()->tick_counter()->TickAndMaybeEnterSafepoint(); - int max_blocks = data()->code()->InstructionBlockCount(); - if (range != nullptr && range->IsSpilledOnlyInDeferredBlocks(data())) { - // If the range is spilled only in deferred blocks and starts in - // a non-deferred block, we transition its representation here so - // that the LiveRangeConnector processes them correctly. If, - // however, they start in a deferred block, we uograde them to - // spill at definition, as that definition is in a deferred block - // anyway. While this is an optimization, the code in LiveRangeConnector - // relies on it! - if (GetInstructionBlock(data()->code(), range->Start())->IsDeferred()) { - TRACE("Live range %d is spilled and alive in deferred code only\n", - range->vreg()); - range->TransitionRangeToSpillAtDefinition(); - } else { - TRACE( - "Live range %d is spilled deferred code only but alive outside\n", + for (auto range : data()->live_ranges()) { + data()->tick_counter()->TickAndMaybeEnterSafepoint(); + int max_blocks = data()->code()->InstructionBlockCount(); + if (range != nullptr && range->IsSpilledOnlyInDeferredBlocks(data())) { + // If the range is spilled only in deferred blocks and starts in + // a non-deferred block, we transition its representation here so + // that the LiveRangeConnector processes them correctly. If, + // however, they start in a deferred block, we uograde them to + // spill at definition, as that definition is in a deferred block + // anyway. While this is an optimization, the code in LiveRangeConnector + // relies on it! + if (GetInstructionBlock(data()->code(), range->Start())->IsDeferred()) { + TRACE("Live range %d is spilled and alive in deferred code only\n", range->vreg()); - DCHECK(data()->is_turbo_control_flow_aware_allocation()); - range->TransitionRangeToDeferredSpill(data()->allocation_zone(), - max_blocks); - } + range->TransitionRangeToSpillAtDefinition(); + } else { + TRACE("Live range %d is spilled deferred code only but alive outside\n", + range->vreg()); + range->TransitionRangeToDeferredSpill(data()->allocation_zone(), + max_blocks); } } } diff --git a/deps/v8/src/compiler/backend/register-allocator.h b/deps/v8/src/compiler/backend/register-allocator.h index a9dc2900f1..87c0afbcfc 100644 --- a/deps/v8/src/compiler/backend/register-allocator.h +++ b/deps/v8/src/compiler/backend/register-allocator.h @@ -175,11 +175,7 @@ class LifetimePosition final { std::ostream& operator<<(std::ostream& os, const LifetimePosition pos); -enum class RegisterAllocationFlag : unsigned { - kTurboControlFlowAwareAllocation = 1 << 0, - kTurboPreprocessRanges = 1 << 1, - kTraceAllocation = 1 << 2 -}; +enum class RegisterAllocationFlag : unsigned { kTraceAllocation = 1 << 0 }; using RegisterAllocationFlags = base::Flags<RegisterAllocationFlag>; @@ -210,14 +206,6 @@ class TopTierRegisterAllocationData final : public RegisterAllocationData { // regular code (kSpillAtDefinition). enum SpillMode { kSpillAtDefinition, kSpillDeferred }; - bool is_turbo_control_flow_aware_allocation() const { - return flags_ & RegisterAllocationFlag::kTurboControlFlowAwareAllocation; - } - - bool is_turbo_preprocess_ranges() const { - return flags_ & RegisterAllocationFlag::kTurboPreprocessRanges; - } - bool is_trace_alloc() { return flags_ & RegisterAllocationFlag::kTraceAllocation; } @@ -615,7 +603,7 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { // Can this live range be spilled at this position. bool CanBeSpilled(LifetimePosition pos) const; - // Splitting primitive used by both splitting and splintering members. + // Splitting primitive used by splitting members. // Performs the split, but does not link the resulting ranges. // The given position must follow the start of the range. // All uses following the given position will be moved from this @@ -708,7 +696,7 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { using ControlFlowRegisterHint = base::BitField<uint8_t, 22, 6>; // Bits 28-31 are used by TopLevelLiveRange. - // Unique among children and splinters of the same virtual register. + // Unique among children of the same virtual register. int relative_id_; uint32_t bits_; UseInterval* last_interval_; @@ -720,8 +708,6 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { mutable UseInterval* current_interval_; // This is used as a cache, it doesn't affect correctness. mutable UsePosition* last_processed_use_; - // Cache the last position splintering stopped at. - mutable UsePosition* splitting_pointer_; // This is used as a cache in BuildLiveRanges and during register allocation. UsePosition* current_hint_position_; LiveRangeBundle* bundle_ = nullptr; @@ -853,16 +839,6 @@ class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange { // Shorten the most recently added interval by setting a new start. void ShortenTo(LifetimePosition start, bool trace_alloc); - // Detaches between start and end, and attributes the resulting range to - // result. - // The current range is pointed to as "splintered_from". No parent/child - // relationship is established between this and result. - void Splinter(LifetimePosition start, LifetimePosition end, Zone* zone); - - // Assuming other was splintered from this range, embeds other and its - // children as part of the children sequence of this range. - void Merge(TopLevelLiveRange* other, Zone* zone); - // Spill range management. void SetSpillRange(SpillRange* spill_range); @@ -963,19 +939,12 @@ class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange { } } - TopLevelLiveRange* splintered_from() const { return splintered_from_; } - bool IsSplinter() const { return splintered_from_ != nullptr; } bool MayRequireSpillRange() const { - DCHECK(!IsSplinter()); return !HasSpillOperand() && spill_range_ == nullptr; } void UpdateSpillRangePostMerge(TopLevelLiveRange* merged); int vreg() const { return vreg_; } -#if DEBUG - int debug_virt_reg() const; -#endif - void Verify() const; void VerifyChildrenInOrder() const; @@ -985,19 +954,13 @@ class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange { // if you call it with a non-decreasing sequence of positions. LiveRange* GetChildCovers(LifetimePosition pos); - int GetNextChildId() { - return IsSplinter() ? splintered_from()->GetNextChildId() - : ++last_child_id_; - } + int GetNextChildId() { return ++last_child_id_; } int GetMaxChildCount() const { return last_child_id_ + 1; } bool IsSpilledOnlyInDeferredBlocks( const TopTierRegisterAllocationData* data) const { - if (data->is_turbo_control_flow_aware_allocation()) { - return spill_type() == SpillType::kDeferredSpillRange; - } - return spilled_in_deferred_blocks_; + return spill_type() == SpillType::kDeferredSpillRange; } struct SpillMoveInsertionList; @@ -1007,17 +970,6 @@ class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange { DCHECK(!IsSpilledOnlyInDeferredBlocks(data)); return spill_move_insertion_locations_; } - TopLevelLiveRange* splinter() const { return splinter_; } - void SetSplinter(TopLevelLiveRange* splinter) { - DCHECK_NULL(splinter_); - DCHECK_NOT_NULL(splinter); - - splinter_ = splinter; - splinter->relative_id_ = GetNextChildId(); - splinter->set_spill_type(spill_type()); - splinter->SetSplinteredFrom(this); - if (bundle_ != nullptr) splinter->set_bundle(bundle_); - } void MarkHasPreassignedSlot() { has_preassigned_slot_ = true; } bool has_preassigned_slot() const { return has_preassigned_slot_; } @@ -1056,7 +1008,6 @@ class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange { private: friend class LiveRange; - void SetSplinteredFrom(TopLevelLiveRange* splinter_parent); // If spill type is kSpillRange, then this value indicates whether we've // chosen to spill at the definition or at some later points. @@ -1076,7 +1027,6 @@ class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange { int vreg_; int last_child_id_; - TopLevelLiveRange* splintered_from_; union { // Correct value determined by spill_type() InstructionOperand* spill_operand_; @@ -1096,7 +1046,6 @@ class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange { int spill_start_index_; UsePosition* last_pos_; LiveRange* last_child_covers_; - TopLevelLiveRange* splinter_; DISALLOW_COPY_AND_ASSIGN(TopLevelLiveRange); }; @@ -1310,11 +1259,8 @@ class LiveRangeBuilder final : public ZoneObject { spill_mode); } SpillMode SpillModeForBlock(const InstructionBlock* block) const { - if (data()->is_turbo_control_flow_aware_allocation()) { - return block->IsDeferred() ? SpillMode::kSpillDeferred - : SpillMode::kSpillAtDefinition; - } - return SpillMode::kSpillAtDefinition; + return block->IsDeferred() ? SpillMode::kSpillDeferred + : SpillMode::kSpillAtDefinition; } TopTierRegisterAllocationData* const data_; ZoneMap<InstructionOperand*, UsePosition*> phi_hints_; @@ -1529,7 +1475,6 @@ class LinearScanAllocator final : public RegisterAllocator { Vector<LifetimePosition> free_until_pos); void ProcessCurrentRange(LiveRange* current, SpillMode spill_mode); void AllocateBlockedReg(LiveRange* range, SpillMode spill_mode); - bool TrySplitAndSpillSplinter(LiveRange* range); // Spill the given life range after position pos. void SpillAfter(LiveRange* range, LifetimePosition pos, SpillMode spill_mode); diff --git a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc index 12b1167d4d..f3ab25630f 100644 --- a/deps/v8/src/compiler/backend/s390/code-generator-s390.cc +++ b/deps/v8/src/compiler/backend/s390/code-generator-s390.cc @@ -643,184 +643,184 @@ static inline int AssembleUnaryOp(Instruction* instr, _R _r, _M _m, _I _i) { __ MovFromFloatResult(i.OutputDoubleRegister()); \ } while (0) -#define ASSEMBLE_DOUBLE_MAX() \ - do { \ - DoubleRegister left_reg = i.InputDoubleRegister(0); \ - DoubleRegister right_reg = i.InputDoubleRegister(1); \ - DoubleRegister result_reg = i.OutputDoubleRegister(); \ - Label check_nan_left, check_zero, return_left, return_right, done; \ - __ cdbr(left_reg, right_reg); \ - __ bunordered(&check_nan_left, Label::kNear); \ - __ beq(&check_zero); \ - __ bge(&return_left, Label::kNear); \ - __ b(&return_right, Label::kNear); \ - \ - __ bind(&check_zero); \ - __ lzdr(kDoubleRegZero); \ - __ cdbr(left_reg, kDoubleRegZero); \ - /* left == right != 0. */ \ - __ bne(&return_left, Label::kNear); \ - /* At this point, both left and right are either 0 or -0. */ \ - /* N.B. The following works because +0 + -0 == +0 */ \ - /* For max we want logical-and of sign bit: (L + R) */ \ - __ ldr(result_reg, left_reg); \ - __ adbr(result_reg, right_reg); \ - __ b(&done, Label::kNear); \ - \ - __ bind(&check_nan_left); \ - __ cdbr(left_reg, left_reg); \ - /* left == NaN. */ \ - __ bunordered(&return_left, Label::kNear); \ - \ - __ bind(&return_right); \ - if (right_reg != result_reg) { \ - __ ldr(result_reg, right_reg); \ - } \ - __ b(&done, Label::kNear); \ - \ - __ bind(&return_left); \ - if (left_reg != result_reg) { \ - __ ldr(result_reg, left_reg); \ - } \ - __ bind(&done); \ +#define ASSEMBLE_DOUBLE_MAX() \ + do { \ + DoubleRegister left_reg = i.InputDoubleRegister(0); \ + DoubleRegister right_reg = i.InputDoubleRegister(1); \ + DoubleRegister result_reg = i.OutputDoubleRegister(); \ + Label check_zero, return_left, return_right, return_nan, done; \ + __ cdbr(left_reg, right_reg); \ + __ bunordered(&return_nan, Label::kNear); \ + __ beq(&check_zero); \ + __ bge(&return_left, Label::kNear); \ + __ b(&return_right, Label::kNear); \ + \ + __ bind(&check_zero); \ + __ lzdr(kDoubleRegZero); \ + __ cdbr(left_reg, kDoubleRegZero); \ + /* left == right != 0. */ \ + __ bne(&return_left, Label::kNear); \ + /* At this point, both left and right are either 0 or -0. */ \ + /* N.B. The following works because +0 + -0 == +0 */ \ + /* For max we want logical-and of sign bit: (L + R) */ \ + __ ldr(result_reg, left_reg); \ + __ adbr(result_reg, right_reg); \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_nan); \ + /* If left or right are NaN, adbr propagates the appropriate one.*/ \ + __ adbr(left_reg, right_reg); \ + __ b(&return_left, Label::kNear); \ + \ + __ bind(&return_right); \ + if (right_reg != result_reg) { \ + __ ldr(result_reg, right_reg); \ + } \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_left); \ + if (left_reg != result_reg) { \ + __ ldr(result_reg, left_reg); \ + } \ + __ bind(&done); \ } while (0) -#define ASSEMBLE_DOUBLE_MIN() \ - do { \ - DoubleRegister left_reg = i.InputDoubleRegister(0); \ - DoubleRegister right_reg = i.InputDoubleRegister(1); \ - DoubleRegister result_reg = i.OutputDoubleRegister(); \ - Label check_nan_left, check_zero, return_left, return_right, done; \ - __ cdbr(left_reg, right_reg); \ - __ bunordered(&check_nan_left, Label::kNear); \ - __ beq(&check_zero); \ - __ ble(&return_left, Label::kNear); \ - __ b(&return_right, Label::kNear); \ - \ - __ bind(&check_zero); \ - __ lzdr(kDoubleRegZero); \ - __ cdbr(left_reg, kDoubleRegZero); \ - /* left == right != 0. */ \ - __ bne(&return_left, Label::kNear); \ - /* At this point, both left and right are either 0 or -0. */ \ - /* N.B. The following works because +0 + -0 == +0 */ \ - /* For min we want logical-or of sign bit: -(-L + -R) */ \ - __ lcdbr(left_reg, left_reg); \ - __ ldr(result_reg, left_reg); \ - if (left_reg == right_reg) { \ - __ adbr(result_reg, right_reg); \ - } else { \ - __ sdbr(result_reg, right_reg); \ - } \ - __ lcdbr(result_reg, result_reg); \ - __ b(&done, Label::kNear); \ - \ - __ bind(&check_nan_left); \ - __ cdbr(left_reg, left_reg); \ - /* left == NaN. */ \ - __ bunordered(&return_left, Label::kNear); \ - \ - __ bind(&return_right); \ - if (right_reg != result_reg) { \ - __ ldr(result_reg, right_reg); \ - } \ - __ b(&done, Label::kNear); \ - \ - __ bind(&return_left); \ - if (left_reg != result_reg) { \ - __ ldr(result_reg, left_reg); \ - } \ - __ bind(&done); \ +#define ASSEMBLE_DOUBLE_MIN() \ + do { \ + DoubleRegister left_reg = i.InputDoubleRegister(0); \ + DoubleRegister right_reg = i.InputDoubleRegister(1); \ + DoubleRegister result_reg = i.OutputDoubleRegister(); \ + Label check_zero, return_left, return_right, return_nan, done; \ + __ cdbr(left_reg, right_reg); \ + __ bunordered(&return_nan, Label::kNear); \ + __ beq(&check_zero); \ + __ ble(&return_left, Label::kNear); \ + __ b(&return_right, Label::kNear); \ + \ + __ bind(&check_zero); \ + __ lzdr(kDoubleRegZero); \ + __ cdbr(left_reg, kDoubleRegZero); \ + /* left == right != 0. */ \ + __ bne(&return_left, Label::kNear); \ + /* At this point, both left and right are either 0 or -0. */ \ + /* N.B. The following works because +0 + -0 == +0 */ \ + /* For min we want logical-or of sign bit: -(-L + -R) */ \ + __ lcdbr(left_reg, left_reg); \ + __ ldr(result_reg, left_reg); \ + if (left_reg == right_reg) { \ + __ adbr(result_reg, right_reg); \ + } else { \ + __ sdbr(result_reg, right_reg); \ + } \ + __ lcdbr(result_reg, result_reg); \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_nan); \ + /* If left or right are NaN, adbr propagates the appropriate one.*/ \ + __ adbr(left_reg, right_reg); \ + __ b(&return_left, Label::kNear); \ + \ + __ bind(&return_right); \ + if (right_reg != result_reg) { \ + __ ldr(result_reg, right_reg); \ + } \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_left); \ + if (left_reg != result_reg) { \ + __ ldr(result_reg, left_reg); \ + } \ + __ bind(&done); \ } while (0) -#define ASSEMBLE_FLOAT_MAX() \ - do { \ - DoubleRegister left_reg = i.InputDoubleRegister(0); \ - DoubleRegister right_reg = i.InputDoubleRegister(1); \ - DoubleRegister result_reg = i.OutputDoubleRegister(); \ - Label check_nan_left, check_zero, return_left, return_right, done; \ - __ cebr(left_reg, right_reg); \ - __ bunordered(&check_nan_left, Label::kNear); \ - __ beq(&check_zero); \ - __ bge(&return_left, Label::kNear); \ - __ b(&return_right, Label::kNear); \ - \ - __ bind(&check_zero); \ - __ lzdr(kDoubleRegZero); \ - __ cebr(left_reg, kDoubleRegZero); \ - /* left == right != 0. */ \ - __ bne(&return_left, Label::kNear); \ - /* At this point, both left and right are either 0 or -0. */ \ - /* N.B. The following works because +0 + -0 == +0 */ \ - /* For max we want logical-and of sign bit: (L + R) */ \ - __ ldr(result_reg, left_reg); \ - __ aebr(result_reg, right_reg); \ - __ b(&done, Label::kNear); \ - \ - __ bind(&check_nan_left); \ - __ cebr(left_reg, left_reg); \ - /* left == NaN. */ \ - __ bunordered(&return_left, Label::kNear); \ - \ - __ bind(&return_right); \ - if (right_reg != result_reg) { \ - __ ldr(result_reg, right_reg); \ - } \ - __ b(&done, Label::kNear); \ - \ - __ bind(&return_left); \ - if (left_reg != result_reg) { \ - __ ldr(result_reg, left_reg); \ - } \ - __ bind(&done); \ +#define ASSEMBLE_FLOAT_MAX() \ + do { \ + DoubleRegister left_reg = i.InputDoubleRegister(0); \ + DoubleRegister right_reg = i.InputDoubleRegister(1); \ + DoubleRegister result_reg = i.OutputDoubleRegister(); \ + Label check_zero, return_left, return_right, return_nan, done; \ + __ cebr(left_reg, right_reg); \ + __ bunordered(&return_nan, Label::kNear); \ + __ beq(&check_zero); \ + __ bge(&return_left, Label::kNear); \ + __ b(&return_right, Label::kNear); \ + \ + __ bind(&check_zero); \ + __ lzdr(kDoubleRegZero); \ + __ cebr(left_reg, kDoubleRegZero); \ + /* left == right != 0. */ \ + __ bne(&return_left, Label::kNear); \ + /* At this point, both left and right are either 0 or -0. */ \ + /* N.B. The following works because +0 + -0 == +0 */ \ + /* For max we want logical-and of sign bit: (L + R) */ \ + __ ldr(result_reg, left_reg); \ + __ aebr(result_reg, right_reg); \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_nan); \ + /* If left or right are NaN, aebr propagates the appropriate one.*/ \ + __ aebr(left_reg, right_reg); \ + __ b(&return_left, Label::kNear); \ + \ + __ bind(&return_right); \ + if (right_reg != result_reg) { \ + __ ldr(result_reg, right_reg); \ + } \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_left); \ + if (left_reg != result_reg) { \ + __ ldr(result_reg, left_reg); \ + } \ + __ bind(&done); \ } while (0) -#define ASSEMBLE_FLOAT_MIN() \ - do { \ - DoubleRegister left_reg = i.InputDoubleRegister(0); \ - DoubleRegister right_reg = i.InputDoubleRegister(1); \ - DoubleRegister result_reg = i.OutputDoubleRegister(); \ - Label check_nan_left, check_zero, return_left, return_right, done; \ - __ cebr(left_reg, right_reg); \ - __ bunordered(&check_nan_left, Label::kNear); \ - __ beq(&check_zero); \ - __ ble(&return_left, Label::kNear); \ - __ b(&return_right, Label::kNear); \ - \ - __ bind(&check_zero); \ - __ lzdr(kDoubleRegZero); \ - __ cebr(left_reg, kDoubleRegZero); \ - /* left == right != 0. */ \ - __ bne(&return_left, Label::kNear); \ - /* At this point, both left and right are either 0 or -0. */ \ - /* N.B. The following works because +0 + -0 == +0 */ \ - /* For min we want logical-or of sign bit: -(-L + -R) */ \ - __ lcebr(left_reg, left_reg); \ - __ ldr(result_reg, left_reg); \ - if (left_reg == right_reg) { \ - __ aebr(result_reg, right_reg); \ - } else { \ - __ sebr(result_reg, right_reg); \ - } \ - __ lcebr(result_reg, result_reg); \ - __ b(&done, Label::kNear); \ - \ - __ bind(&check_nan_left); \ - __ cebr(left_reg, left_reg); \ - /* left == NaN. */ \ - __ bunordered(&return_left, Label::kNear); \ - \ - __ bind(&return_right); \ - if (right_reg != result_reg) { \ - __ ldr(result_reg, right_reg); \ - } \ - __ b(&done, Label::kNear); \ - \ - __ bind(&return_left); \ - if (left_reg != result_reg) { \ - __ ldr(result_reg, left_reg); \ - } \ - __ bind(&done); \ +#define ASSEMBLE_FLOAT_MIN() \ + do { \ + DoubleRegister left_reg = i.InputDoubleRegister(0); \ + DoubleRegister right_reg = i.InputDoubleRegister(1); \ + DoubleRegister result_reg = i.OutputDoubleRegister(); \ + Label check_zero, return_left, return_right, return_nan, done; \ + __ cebr(left_reg, right_reg); \ + __ bunordered(&return_nan, Label::kNear); \ + __ beq(&check_zero); \ + __ ble(&return_left, Label::kNear); \ + __ b(&return_right, Label::kNear); \ + \ + __ bind(&check_zero); \ + __ lzdr(kDoubleRegZero); \ + __ cebr(left_reg, kDoubleRegZero); \ + /* left == right != 0. */ \ + __ bne(&return_left, Label::kNear); \ + /* At this point, both left and right are either 0 or -0. */ \ + /* N.B. The following works because +0 + -0 == +0 */ \ + /* For min we want logical-or of sign bit: -(-L + -R) */ \ + __ lcebr(left_reg, left_reg); \ + __ ldr(result_reg, left_reg); \ + if (left_reg == right_reg) { \ + __ aebr(result_reg, right_reg); \ + } else { \ + __ sebr(result_reg, right_reg); \ + } \ + __ lcebr(result_reg, result_reg); \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_nan); \ + /* If left or right are NaN, aebr propagates the appropriate one.*/ \ + __ aebr(left_reg, right_reg); \ + __ b(&return_left, Label::kNear); \ + \ + __ bind(&return_right); \ + if (right_reg != result_reg) { \ + __ ldr(result_reg, right_reg); \ + } \ + __ b(&done, Label::kNear); \ + \ + __ bind(&return_left); \ + if (left_reg != result_reg) { \ + __ ldr(result_reg, left_reg); \ + } \ + __ bind(&done); \ } while (0) // // Only MRI mode for these instructions available @@ -2581,7 +2581,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kS390_LoadReverseSimd128: { AddressingMode mode = kMode_None; MemOperand operand = i.MemoryOperand(&mode); - if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) { + if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) && + is_uint12(operand.offset())) { __ vlbr(i.OutputSimd128Register(), operand, Condition(4)); } else { __ lrvg(r0, operand); @@ -2643,7 +2644,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( size_t index = 0; AddressingMode mode = kMode_None; MemOperand operand = i.MemoryOperand(&mode, &index); - if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) { + if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) && + is_uint12(operand.offset())) { __ vstbr(i.InputSimd128Register(index), operand, Condition(4)); } else { __ vlgv(r0, i.InputSimd128Register(index), MemOperand(r0, 1), @@ -3280,8 +3282,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Register scratch_0 = r0; Register scratch_1 = r1; for (int i = 0; i < 2; i++) { - __ vlgv(scratch_0, src0, MemOperand(r0, 0), Condition(3)); - __ vlgv(scratch_1, src1, MemOperand(r0, 0), Condition(3)); + __ vlgv(scratch_0, src0, MemOperand(r0, i), Condition(3)); + __ vlgv(scratch_1, src1, MemOperand(r0, i), Condition(3)); __ Mul64(scratch_0, scratch_1); scratch_0 = r1; scratch_1 = ip; @@ -4176,7 +4178,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } #undef BINOP_EXTRACT - case kS390_S8x16Shuffle: { + case kS390_I8x16Shuffle: { Simd128Register dst = i.OutputSimd128Register(), src0 = i.InputSimd128Register(0), src1 = i.InputSimd128Register(1); @@ -4196,7 +4198,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vperm(dst, src0, src1, kScratchDoubleReg, Condition(0), Condition(0)); break; } - case kS390_S8x16Swizzle: { + case kS390_I8x16Swizzle: { Simd128Register dst = i.OutputSimd128Register(), src0 = i.InputSimd128Register(0), src1 = i.InputSimd128Register(1); @@ -4579,9 +4581,6 @@ void CodeGenerator::AssembleConstructFrame() { } } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(ip); - if (call_descriptor->PushArgumentCount()) { - __ Push(kJavaScriptCallArgCountRegister); - } } else { StackFrame::Type type = info()->GetOutputStackFrameType(); // TODO(mbrandy): Detect cases where ip is the entrypoint (for diff --git a/deps/v8/src/compiler/backend/s390/instruction-codes-s390.h b/deps/v8/src/compiler/backend/s390/instruction-codes-s390.h index 693b555ae7..ab7973c089 100644 --- a/deps/v8/src/compiler/backend/s390/instruction-codes-s390.h +++ b/deps/v8/src/compiler/backend/s390/instruction-codes-s390.h @@ -365,8 +365,8 @@ namespace compiler { V(S390_I8x16RoundingAverageU) \ V(S390_I8x16Abs) \ V(S390_I8x16BitMask) \ - V(S390_S8x16Shuffle) \ - V(S390_S8x16Swizzle) \ + V(S390_I8x16Shuffle) \ + V(S390_I8x16Swizzle) \ V(S390_V64x2AnyTrue) \ V(S390_V32x4AnyTrue) \ V(S390_V16x8AnyTrue) \ diff --git a/deps/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc b/deps/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc index 1117ec5141..c0a854b7f1 100644 --- a/deps/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc +++ b/deps/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc @@ -311,8 +311,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kS390_I8x16RoundingAverageU: case kS390_I8x16Abs: case kS390_I8x16BitMask: - case kS390_S8x16Shuffle: - case kS390_S8x16Swizzle: + case kS390_I8x16Shuffle: + case kS390_I8x16Swizzle: case kS390_V64x2AnyTrue: case kS390_V32x4AnyTrue: case kS390_V16x8AnyTrue: diff --git a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc index bb74050dcf..ee3e996169 100644 --- a/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc +++ b/deps/v8/src/compiler/backend/s390/instruction-selector-s390.cc @@ -2201,6 +2201,10 @@ void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) { VisitFloat64Compare(this, node, &cont); } +bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) { + UNIMPLEMENTED(); +} + void InstructionSelector::EmitPrepareArguments( ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor, Node* node) { @@ -2820,7 +2824,7 @@ SIMD_VISIT_PMIN_MAX(F32x4Pmax) #undef SIMD_VISIT_PMIN_MAX #undef SIMD_TYPES -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; uint8_t* shuffle_p = &shuffle[0]; bool is_swizzle; @@ -2841,7 +2845,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { } shuffle_p = &shuffle_remapped[0]; #endif - Emit(kS390_S8x16Shuffle, g.DefineAsRegister(node), + Emit(kS390_I8x16Shuffle, g.DefineAsRegister(node), g.UseUniqueRegister(input0), g.UseUniqueRegister(input1), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p)), g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p + 4)), @@ -2849,9 +2853,9 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p + 12))); } -void InstructionSelector::VisitS8x16Swizzle(Node* node) { +void InstructionSelector::VisitI8x16Swizzle(Node* node) { S390OperandGenerator g(this); - Emit(kS390_S8x16Swizzle, g.DefineAsRegister(node), + Emit(kS390_I8x16Swizzle, g.DefineAsRegister(node), g.UseUniqueRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1))); } diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc index b84df81925..e0cf602b11 100644 --- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc +++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc @@ -194,94 +194,6 @@ class OutOfLineLoadFloat64NaN final : public OutOfLineCode { XMMRegister const result_; }; -class OutOfLineF32x4Min final : public OutOfLineCode { - public: - OutOfLineF32x4Min(CodeGenerator* gen, XMMRegister result, XMMRegister error) - : OutOfLineCode(gen), result_(result), error_(error) {} - - void Generate() final { - // |result| is the partial result, |kScratchDoubleReg| is the error. - // propagate -0's and NaNs (possibly non-canonical) from the error. - __ Orps(error_, result_); - // Canonicalize NaNs by quieting and clearing the payload. - __ Cmpps(result_, error_, int8_t{3}); - __ Orps(error_, result_); - __ Psrld(result_, byte{10}); - __ Andnps(result_, error_); - } - - private: - XMMRegister const result_; - XMMRegister const error_; -}; - -class OutOfLineF64x2Min final : public OutOfLineCode { - public: - OutOfLineF64x2Min(CodeGenerator* gen, XMMRegister result, XMMRegister error) - : OutOfLineCode(gen), result_(result), error_(error) {} - - void Generate() final { - // |result| is the partial result, |kScratchDoubleReg| is the error. - // propagate -0's and NaNs (possibly non-canonical) from the error. - __ Orpd(error_, result_); - // Canonicalize NaNs by quieting and clearing the payload. - __ Cmppd(result_, error_, int8_t{3}); - __ Orpd(error_, result_); - __ Psrlq(result_, 13); - __ Andnpd(result_, error_); - } - - private: - XMMRegister const result_; - XMMRegister const error_; -}; - -class OutOfLineF32x4Max final : public OutOfLineCode { - public: - OutOfLineF32x4Max(CodeGenerator* gen, XMMRegister result, XMMRegister error) - : OutOfLineCode(gen), result_(result), error_(error) {} - - void Generate() final { - // |result| is the partial result, |kScratchDoubleReg| is the error. - // Propagate NaNs (possibly non-canonical). - __ Orps(result_, error_); - // Propagate sign errors and (subtle) quiet NaNs. - __ Subps(result_, error_); - // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. - __ Cmpps(error_, result_, int8_t{3}); - __ Psrld(error_, byte{10}); - __ Andnps(error_, result_); - __ Movaps(result_, error_); - } - - private: - XMMRegister const result_; - XMMRegister const error_; -}; - -class OutOfLineF64x2Max final : public OutOfLineCode { - public: - OutOfLineF64x2Max(CodeGenerator* gen, XMMRegister result, XMMRegister error) - : OutOfLineCode(gen), result_(result), error_(error) {} - - void Generate() final { - // |result| is the partial result, |kScratchDoubleReg| is the error. - // Propagate NaNs (possibly non-canonical). - __ Orpd(result_, error_); - // Propagate sign errors and (subtle) quiet NaNs. - __ Subpd(result_, error_); - // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. - __ Cmppd(error_, result_, int8_t{3}); - __ Psrlq(error_, byte{13}); - __ Andnpd(error_, result_); - __ Movapd(result_, error_); - } - - private: - XMMRegister const result_; - XMMRegister const error_; -}; - class OutOfLineTruncateDoubleToI final : public OutOfLineCode { public: OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, @@ -745,13 +657,33 @@ void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, namespace { -void AdjustStackPointerForTailCall(TurboAssembler* assembler, +void AdjustStackPointerForTailCall(Instruction* instr, + TurboAssembler* assembler, Linkage* linkage, + OptimizedCompilationInfo* info, FrameAccessState* state, int new_slot_above_sp, bool allow_shrinkage = true) { - int current_sp_offset = state->GetSPToFPSlotCount() + - StandardFrameConstants::kFixedSlotCountAboveFp; - int stack_slot_delta = new_slot_above_sp - current_sp_offset; + int stack_slot_delta; + if (HasCallDescriptorFlag(instr, CallDescriptor::kIsTailCallForTierUp)) { + // For this special tail-call mode, the callee has the same arguments and + // linkage as the caller, and arguments adapter frames must be preserved. + // Thus we simply have reset the stack pointer register to its original + // value before frame construction. + // See also: AssembleConstructFrame. + DCHECK(!info->is_osr()); + DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedRegisters(), 0); + DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters(), 0); + DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0); + stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() - + kReturnAddressStackSlotCount) * + -1; + DCHECK_LE(stack_slot_delta, 0); + } else { + int current_sp_offset = state->GetSPToFPSlotCount() + + StandardFrameConstants::kFixedSlotCountAboveFp; + stack_slot_delta = new_slot_above_sp - current_sp_offset; + } + if (stack_slot_delta > 0) { assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize); state->IncreaseSPDelta(stack_slot_delta); @@ -778,12 +710,14 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, if (!pushes.empty() && (LocationOperand::cast(pushes.back()->destination()).index() + 1 == first_unused_stack_slot)) { + DCHECK(!HasCallDescriptorFlag(instr, CallDescriptor::kIsTailCallForTierUp)); X64OperandConverter g(this, instr); for (auto move : pushes) { LocationOperand destination_location( LocationOperand::cast(move->destination())); InstructionOperand source(move->source()); - AdjustStackPointerForTailCall(tasm(), frame_access_state(), + AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(), + frame_access_state(), destination_location.index()); if (source.IsStackSlot()) { LocationOperand source_location(LocationOperand::cast(source)); @@ -801,14 +735,15 @@ void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, move->Eliminate(); } } - AdjustStackPointerForTailCall(tasm(), frame_access_state(), - first_unused_stack_slot, false); + AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(), + frame_access_state(), first_unused_stack_slot, + false); } void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, int first_unused_stack_slot) { - AdjustStackPointerForTailCall(tasm(), frame_access_state(), - first_unused_stack_slot); + AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(), + frame_access_state(), first_unused_stack_slot); } // Check that {kJavaScriptCallCodeStartRegister} is correct. @@ -912,12 +847,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArchTailCallCodeObjectFromJSFunction: - case kArchTailCallCodeObject: { - if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { + if (!HasCallDescriptorFlag(instr, CallDescriptor::kIsTailCallForTierUp)) { AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, i.TempRegister(0), i.TempRegister(1), i.TempRegister(2)); } + V8_FALLTHROUGH; + case kArchTailCallCodeObject: { if (HasImmediateInput(instr, 0)) { Handle<Code> code = i.InputCode(0); __ Jump(code, RelocInfo::CODE_TARGET); @@ -2348,6 +2284,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( int slot = MiscField::decode(instr->opcode()); if (HasImmediateInput(instr, 0)) { __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0)); + } else if (instr->InputAt(0)->IsFPRegister()) { + LocationOperand* op = LocationOperand::cast(instr->InputAt(0)); + if (op->representation() == MachineRepresentation::kFloat64) { + __ Movsd(Operand(rsp, slot * kSystemPointerSize), + i.InputDoubleRegister(0)); + } else { + DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); + __ Movss(Operand(rsp, slot * kSystemPointerSize), + i.InputFloatRegister(0)); + } } else { __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0)); } @@ -2419,18 +2365,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - // The minpd instruction doesn't propagate NaNs and -0's in its first - // operand. Perform minpd in both orders and compare results. Handle the - // unlikely case of discrepancies out of line. + // The minpd instruction doesn't propagate NaNs and +0's in its first + // operand. Perform minpd in both orders, merge the resuls, and adjust. __ Movapd(kScratchDoubleReg, src1); __ Minpd(kScratchDoubleReg, dst); __ Minpd(dst, src1); - // Most likely there is no difference and we're done. - __ Xorpd(kScratchDoubleReg, dst); - __ Ptest(kScratchDoubleReg, kScratchDoubleReg); - auto ool = zone()->New<OutOfLineF64x2Min>(this, dst, kScratchDoubleReg); - __ j(not_zero, ool->entry()); - __ bind(ool->exit()); + // propagate -0's and NaNs, which may be non-canonical. + __ Orpd(kScratchDoubleReg, dst); + // Canonicalize NaNs by quieting and clearing the payload. + __ Cmppd(dst, kScratchDoubleReg, int8_t{3}); + __ Orpd(kScratchDoubleReg, dst); + __ Psrlq(dst, 13); + __ Andnpd(dst, kScratchDoubleReg); break; } case kX64F64x2Max: { @@ -2438,17 +2384,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); // The maxpd instruction doesn't propagate NaNs and +0's in its first - // operand. Perform maxpd in both orders and compare results. Handle the - // unlikely case of discrepancies out of line. + // operand. Perform maxpd in both orders, merge the resuls, and adjust. __ Movapd(kScratchDoubleReg, src1); __ Maxpd(kScratchDoubleReg, dst); __ Maxpd(dst, src1); - // Most likely there is no difference and we're done. - __ Xorpd(kScratchDoubleReg, dst); - __ Ptest(kScratchDoubleReg, kScratchDoubleReg); - auto ool = zone()->New<OutOfLineF64x2Max>(this, dst, kScratchDoubleReg); - __ j(not_zero, ool->entry()); - __ bind(ool->exit()); + // Find discrepancies. + __ Xorpd(dst, kScratchDoubleReg); + // Propagate NaNs, which may be non-canonical. + __ Orpd(kScratchDoubleReg, dst); + // Propagate sign discrepancy and (subtle) quiet NaNs. + __ Subpd(kScratchDoubleReg, dst); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ Cmppd(dst, kScratchDoubleReg, int8_t{3}); + __ Psrlq(dst, 13); + __ Andnpd(dst, kScratchDoubleReg); break; } case kX64F64x2Eq: { @@ -2612,18 +2561,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - // The minps instruction doesn't propagate NaNs and -0's in its first - // operand. Perform minps in both orders and compare results. Handle the - // unlikely case of discrepancies out of line. + // The minps instruction doesn't propagate NaNs and +0's in its first + // operand. Perform minps in both orders, merge the resuls, and adjust. __ Movaps(kScratchDoubleReg, src1); __ Minps(kScratchDoubleReg, dst); __ Minps(dst, src1); - // Most likely there is no difference and we're done. - __ Xorps(kScratchDoubleReg, dst); - __ Ptest(kScratchDoubleReg, kScratchDoubleReg); - auto ool = zone()->New<OutOfLineF32x4Min>(this, dst, kScratchDoubleReg); - __ j(not_zero, ool->entry()); - __ bind(ool->exit()); + // propagate -0's and NaNs, which may be non-canonical. + __ Orps(kScratchDoubleReg, dst); + // Canonicalize NaNs by quieting and clearing the payload. + __ Cmpps(dst, kScratchDoubleReg, int8_t{3}); + __ Orps(kScratchDoubleReg, dst); + __ Psrld(dst, byte{10}); + __ Andnps(dst, kScratchDoubleReg); break; } case kX64F32x4Max: { @@ -2631,17 +2580,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); // The maxps instruction doesn't propagate NaNs and +0's in its first - // operand. Perform maxps in both orders and compare results. Handle the - // unlikely case of discrepancies out of line. + // operand. Perform maxps in both orders, merge the resuls, and adjust. __ Movaps(kScratchDoubleReg, src1); __ Maxps(kScratchDoubleReg, dst); __ Maxps(dst, src1); - // Most likely there is no difference and we're done. - __ Xorps(kScratchDoubleReg, dst); - __ Ptest(kScratchDoubleReg, kScratchDoubleReg); - auto ool = zone()->New<OutOfLineF32x4Max>(this, dst, kScratchDoubleReg); - __ j(not_zero, ool->entry()); - __ bind(ool->exit()); + // Find discrepancies. + __ Xorps(dst, kScratchDoubleReg); + // Propagate NaNs, which may be non-canonical. + __ Orps(kScratchDoubleReg, dst); + // Propagate sign discrepancy and (subtle) quiet NaNs. + __ Subps(kScratchDoubleReg, dst); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ Cmpps(dst, kScratchDoubleReg, int8_t{3}); + __ Psrld(dst, byte{10}); + __ Andnps(dst, kScratchDoubleReg); break; } case kX64F32x4Eq: { @@ -3724,7 +3676,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Andnps(dst, i.InputSimd128Register(1)); break; } - case kX64S8x16Swizzle: { + case kX64I8x16Swizzle: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); XMMRegister mask = i.TempSimd128Register(0); @@ -3737,7 +3689,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Pshufb(dst, mask); break; } - case kX64S8x16Shuffle: { + case kX64I8x16Shuffle: { XMMRegister dst = i.OutputSimd128Register(); XMMRegister tmp_simd = i.TempSimd128Register(0); if (instr->InputCount() == 5) { // only one input operand @@ -4454,7 +4406,7 @@ static const int kQuadWordSize = 16; } // namespace void CodeGenerator::FinishFrame(Frame* frame) { - auto call_descriptor = linkage()->GetIncomingDescriptor(); + CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor(); const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters(); if (saves_fp != 0) { @@ -4492,9 +4444,6 @@ void CodeGenerator::AssembleConstructFrame() { } } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); - if (call_descriptor->PushArgumentCount()) { - __ pushq(kJavaScriptCallArgCountRegister); - } } else { __ StubPrologue(info()->GetOutputStackFrameType()); if (call_descriptor->IsWasmFunctionCall()) { @@ -4610,7 +4559,7 @@ void CodeGenerator::AssembleConstructFrame() { } } -void CodeGenerator::AssembleReturn(InstructionOperand* pop) { +void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) { auto call_descriptor = linkage()->GetIncomingDescriptor(); // Restore registers. @@ -4643,39 +4592,91 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { unwinding_info_writer_.MarkBlockWillExit(); - // Might need rcx for scratch if pop_size is too big or if there is a variable - // pop count. + // We might need rcx and rdx for scratch. DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit()); DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit()); - size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize; + int parameter_count = + static_cast<int>(call_descriptor->StackParameterCount()); X64OperandConverter g(this, nullptr); + Register pop_reg = additional_pop_count->IsImmediate() + ? rcx + : g.ToRegister(additional_pop_count); + Register scratch_reg = pop_reg == rcx ? rdx : rcx; + Register argc_reg = + additional_pop_count->IsImmediate() ? pop_reg : scratch_reg; +#ifdef V8_NO_ARGUMENTS_ADAPTOR + // Functions with JS linkage have at least one parameter (the receiver). + // If {parameter_count} == 0, it means it is a builtin with + // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping + // itself. + const bool drop_jsargs = frame_access_state()->has_frame() && + call_descriptor->IsJSFunctionCall() && + parameter_count != 0; +#else + const bool drop_jsargs = false; +#endif if (call_descriptor->IsCFunctionCall()) { AssembleDeconstructFrame(); } else if (frame_access_state()->has_frame()) { - if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) { + if (additional_pop_count->IsImmediate() && + g.ToConstant(additional_pop_count).ToInt32() == 0) { // Canonicalize JSFunction return sites for now. if (return_label_.is_bound()) { __ jmp(&return_label_); return; } else { __ bind(&return_label_); - AssembleDeconstructFrame(); } - } else { - AssembleDeconstructFrame(); } + if (drop_jsargs) { + // Get the actual argument count. + __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset)); + } + AssembleDeconstructFrame(); } - if (pop->IsImmediate()) { - pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize; - CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max())); - __ Ret(static_cast<int>(pop_size), rcx); + if (drop_jsargs) { + // In addition to the slots given by {additional_pop_count}, we must pop all + // arguments from the stack (including the receiver). This number of + // arguments is given by max(1 + argc_reg, parameter_count). + Label argc_reg_has_final_count; + // Exclude the receiver to simplify the computation. We'll account for it at + // the end. + int parameter_count_withouth_receiver = parameter_count - 1; + if (parameter_count_withouth_receiver != 0) { + __ cmpq(argc_reg, Immediate(parameter_count_withouth_receiver)); + __ j(greater_equal, &argc_reg_has_final_count, Label::kNear); + __ movq(argc_reg, Immediate(parameter_count_withouth_receiver)); + __ bind(&argc_reg_has_final_count); + } + // Add additional pop count. + if (additional_pop_count->IsImmediate()) { + DCHECK_EQ(pop_reg, argc_reg); + int additional_count = g.ToConstant(additional_pop_count).ToInt32(); + if (additional_count != 0) { + __ addq(pop_reg, Immediate(additional_count)); + } + } else { + __ addq(pop_reg, argc_reg); + } + __ PopReturnAddressTo(scratch_reg); + __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size, + kSystemPointerSize)); // Also pop the receiver. + // We use a return instead of a jump for better return address prediction. + __ PushReturnAddressFrom(scratch_reg); + __ Ret(); + } else if (additional_pop_count->IsImmediate()) { + int additional_count = g.ToConstant(additional_pop_count).ToInt32(); + size_t pop_size = (parameter_count + additional_count) * kSystemPointerSize; + CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max())); + __ Ret(static_cast<int>(pop_size), scratch_reg); } else { - Register pop_reg = g.ToRegister(pop); - Register scratch_reg = pop_reg == rcx ? rdx : rcx; - __ popq(scratch_reg); - __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size))); - __ jmp(scratch_reg); + int pop_size = static_cast<int>(parameter_count * kSystemPointerSize); + __ PopReturnAddressTo(scratch_reg); + __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size, + static_cast<int>(pop_size))); + __ PushReturnAddressFrom(scratch_reg); + __ Ret(); } } @@ -4923,15 +4924,10 @@ void CodeGenerator::AssembleSwap(InstructionOperand* source, case MoveType::kRegisterToStack: { if (source->IsRegister()) { Register src = g.ToRegister(source); - __ pushq(src); - frame_access_state()->IncreaseSPDelta(1); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - kSystemPointerSize); - __ movq(src, g.ToOperand(destination)); - frame_access_state()->IncreaseSPDelta(-1); - __ popq(g.ToOperand(destination)); - unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(), - -kSystemPointerSize); + Operand dst = g.ToOperand(destination); + __ movq(kScratchRegister, src); + __ movq(src, dst); + __ movq(dst, kScratchRegister); } else { DCHECK(source->IsFPRegister()); XMMRegister src = g.ToDoubleRegister(source); diff --git a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h index 21f718b315..7312121a0a 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h +++ b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h @@ -325,8 +325,8 @@ namespace compiler { V(X64S128Xor) \ V(X64S128Select) \ V(X64S128AndNot) \ - V(X64S8x16Swizzle) \ - V(X64S8x16Shuffle) \ + V(X64I8x16Swizzle) \ + V(X64I8x16Shuffle) \ V(X64S8x16LoadSplat) \ V(X64S16x8LoadSplat) \ V(X64S32x4LoadSplat) \ diff --git a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc index 56ca9f1dc1..169753b40e 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -303,8 +303,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64V32x4AllTrue: case kX64V16x8AnyTrue: case kX64V16x8AllTrue: - case kX64S8x16Swizzle: - case kX64S8x16Shuffle: + case kX64I8x16Swizzle: + case kX64I8x16Shuffle: case kX64S32x4Swizzle: case kX64S32x4Shuffle: case kX64S16x8Blend: diff --git a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc index 7df4fcbebd..db212677ea 100644 --- a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc @@ -1297,9 +1297,9 @@ void InstructionSelector::VisitChangeInt32ToInt64(Node* node) { } } -namespace { - -bool ZeroExtendsWord32ToWord64(Node* node) { +bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) { + X64OperandGenerator g(this); + DCHECK_NE(node->opcode(), IrOpcode::kPhi); switch (node->opcode()) { case IrOpcode::kWord32And: case IrOpcode::kWord32Or: @@ -1353,13 +1353,20 @@ bool ZeroExtendsWord32ToWord64(Node* node) { return false; } } + case IrOpcode::kInt32Constant: + case IrOpcode::kInt64Constant: + // Constants are loaded with movl or movq, or xorl for zero; see + // CodeGenerator::AssembleMove. So any non-negative constant that fits + // in a 32-bit signed integer is zero-extended to 64 bits. + if (g.CanBeImmediate(node)) { + return g.GetImmediateIntegerValue(node) >= 0; + } + return false; default: return false; } } -} // namespace - void InstructionSelector::VisitChangeUint32ToUint64(Node* node) { X64OperandGenerator g(this); Node* value = node->InputAt(0); @@ -2271,7 +2278,12 @@ void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) { value_operand, g.TempImmediate(-sw.min_value())); } else { // Zero extend, because we use it as 64-bit index into the jump table. - Emit(kX64Movl, index_operand, value_operand); + if (ZeroExtendsWord32ToWord64(node->InputAt(0))) { + // Input value has already been zero-extended. + index_operand = value_operand; + } else { + Emit(kX64Movl, index_operand, value_operand); + } } // Generate a table lookup. return EmitTableSwitch(sw, index_operand); @@ -3257,7 +3269,7 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, } // namespace -void InstructionSelector::VisitS8x16Shuffle(Node* node) { +void InstructionSelector::VisitI8x16Shuffle(Node* node) { uint8_t shuffle[kSimd128Size]; bool is_swizzle; CanonicalizeShuffle(node, shuffle, &is_swizzle); @@ -3273,9 +3285,12 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { // Swizzles don't generally need DefineSameAsFirst to avoid a move. bool no_same_as_first = is_swizzle; // We generally need UseRegister for input0, Use for input1. + // TODO(v8:9198): We don't have 16-byte alignment for SIMD operands yet, but + // we retain this logic (continue setting these in the various shuffle match + // clauses), but ignore it when selecting registers or slots. bool src0_needs_reg = true; bool src1_needs_reg = false; - ArchOpcode opcode = kX64S8x16Shuffle; // general shuffle is the default + ArchOpcode opcode = kX64I8x16Shuffle; // general shuffle is the default uint8_t offset; uint8_t shuffle32x4[4]; @@ -3365,7 +3380,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { src0_needs_reg = true; imms[imm_count++] = index; } - if (opcode == kX64S8x16Shuffle) { + if (opcode == kX64I8x16Shuffle) { // Use same-as-first for general swizzle, but not shuffle. no_same_as_first = !is_swizzle; src0_needs_reg = !no_same_as_first; @@ -3381,16 +3396,18 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { Node* input0 = node->InputAt(0); InstructionOperand dst = no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node); - InstructionOperand src0 = - src0_needs_reg ? g.UseUniqueRegister(input0) : g.UseUnique(input0); + // TODO(v8:9198): Use src0_needs_reg when we have memory alignment for SIMD. + InstructionOperand src0 = g.UseUniqueRegister(input0); + USE(src0_needs_reg); int input_count = 0; InstructionOperand inputs[2 + kMaxImms + kMaxTemps]; inputs[input_count++] = src0; if (!is_swizzle) { Node* input1 = node->InputAt(1); - inputs[input_count++] = - src1_needs_reg ? g.UseUniqueRegister(input1) : g.UseUnique(input1); + // TODO(v8:9198): Use src1_needs_reg when we have memory alignment for SIMD. + inputs[input_count++] = g.UseUniqueRegister(input1); + USE(src1_needs_reg); } for (int i = 0; i < imm_count; ++i) { inputs[input_count++] = g.UseImmediate(imms[i]); @@ -3398,10 +3415,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps); } -void InstructionSelector::VisitS8x16Swizzle(Node* node) { +void InstructionSelector::VisitI8x16Swizzle(Node* node) { X64OperandGenerator g(this); InstructionOperand temps[] = {g.TempSimd128Register()}; - Emit(kX64S8x16Swizzle, g.DefineSameAsFirst(node), + Emit(kX64I8x16Swizzle, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); } |