diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/v8/src/compiler/backend | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-85-based.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/v8/src/compiler/backend')
38 files changed, 2331 insertions, 567 deletions
diff --git a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc index d453cf0188d..f50c0c858a7 100644 --- a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc @@ -1456,7 +1456,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArmVrintmF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintm(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintmF64: { @@ -1466,7 +1471,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintpF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintp(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintpF64: { @@ -1476,7 +1486,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintzF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintz(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintzF64: { @@ -1960,43 +1975,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmF64x2Lt: { UseScratchRegisterScope temps(tasm()); Register scratch = temps.Acquire(); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), i.InputSimd128Register(1).low()); - __ mov(scratch, Operand(-1), LeaveCC, lt); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, cs); + __ mov(scratch, Operand(-1), LeaveCC, mi); __ vmov(i.OutputSimd128Register().low(), scratch, scratch); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), i.InputSimd128Register(1).high()); - __ mov(scratch, Operand(-1), LeaveCC, lt); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, cs); + __ mov(scratch, Operand(-1), LeaveCC, mi); __ vmov(i.OutputSimd128Register().high(), scratch, scratch); break; } case kArmF64x2Le: { UseScratchRegisterScope temps(tasm()); Register scratch = temps.Acquire(); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), i.InputSimd128Register(1).low()); - __ mov(scratch, Operand(-1), LeaveCC, le); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, hi); + __ mov(scratch, Operand(-1), LeaveCC, ls); __ vmov(i.OutputSimd128Register().low(), scratch, scratch); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), i.InputSimd128Register(1).high()); - __ mov(scratch, Operand(-1), LeaveCC, le); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, hi); + __ mov(scratch, Operand(-1), LeaveCC, ls); __ vmov(i.OutputSimd128Register().high(), scratch, scratch); break; } + case kArmF64x2Pmin: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_EQ(dst, lhs); + + // Move rhs only when rhs is strictly greater (mi). + __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); + __ vmov(dst.low(), rhs.low(), mi); + __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); + __ vmov(dst.high(), rhs.high(), mi); + break; + } + case kArmF64x2Pmax: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_EQ(dst, lhs); + + // Move rhs only when rhs is strictly greater (mi). + __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); + __ vmov(dst.low(), rhs.low(), gt); + __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); + __ vmov(dst.high(), rhs.high(), gt); + break; + } case kArmI64x2SplatI32Pair: { Simd128Register dst = i.OutputSimd128Register(); __ vdup(Neon32, dst, i.InputRegister(0)); @@ -2068,7 +2101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI64x2Neg: { Simd128Register dst = i.OutputSimd128Register(); - __ vmov(dst, static_cast<uint64_t>(0)); + __ vmov(dst, uint64_t{0}); __ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0)); break; } @@ -2220,6 +2253,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0)); break; } + case kArmF32x4Pmin: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_NE(dst, lhs); + DCHECK_NE(dst, rhs); + + // f32x4.pmin(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs)) + // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs)) + __ vcgt(dst, lhs, rhs); + __ vbsl(dst, rhs, lhs); + break; + } + case kArmF32x4Pmax: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_NE(dst, lhs); + DCHECK_NE(dst, rhs); + + // f32x4.pmax(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs)) + __ vcgt(dst, rhs, lhs); + __ vbsl(dst, rhs, lhs); + break; + } case kArmI32x4Splat: { __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0)); break; @@ -2361,8 +2421,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS32, tmp2, src, 31); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001)); - __ vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004)); + __ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001})); + __ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004})); __ vand(tmp2, mask, tmp2); __ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero); @@ -2538,8 +2598,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS16, tmp2, src, 15); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001)); - __ vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010)); + __ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001})); + __ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010})); __ vand(tmp2, mask, tmp2); __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low()); @@ -2692,8 +2752,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS8, tmp2, src, 7); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201)); - __ vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201)); + __ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201})); + __ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201})); __ vand(tmp2, mask, tmp2); __ vext(mask, tmp2, tmp2, 8); __ vzip(Neon8, mask, tmp2); @@ -3028,7 +3088,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } - case kArmS1x4AnyTrue: { + case kArmV32x4AnyTrue: + case kArmV16x8AnyTrue: + case kArmV8x16AnyTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3039,7 +3101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x4AllTrue: { + case kArmV32x4AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3050,19 +3112,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x8AnyTrue: { - const QwNeonRegister& src = i.InputSimd128Register(0); - UseScratchRegisterScope temps(tasm()); - DwVfpRegister scratch = temps.AcquireD(); - __ vpmax(NeonU16, scratch, src.low(), src.high()); - __ vpmax(NeonU16, scratch, scratch, scratch); - __ vpmax(NeonU16, scratch, scratch, scratch); - __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0); - __ cmp(i.OutputRegister(), Operand(0)); - __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); - break; - } - case kArmS1x8AllTrue: { + case kArmV16x8AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3074,23 +3124,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x16AnyTrue: { - const QwNeonRegister& src = i.InputSimd128Register(0); - UseScratchRegisterScope temps(tasm()); - QwNeonRegister q_scratch = temps.AcquireQ(); - DwVfpRegister d_scratch = q_scratch.low(); - __ vpmax(NeonU8, d_scratch, src.low(), src.high()); - __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch); - // vtst to detect any bits in the bottom 32 bits of d_scratch. - // This saves an instruction vs. the naive sequence of vpmax. - // kDoubleRegZero is not changed, since it is 0. - __ vtst(Neon32, q_scratch, q_scratch, q_scratch); - __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0); - __ cmp(i.OutputRegister(), Operand(0)); - __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); - break; - } - case kArmS1x16AllTrue: { + case kArmV8x16AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); diff --git a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h index c6365bf7a50..39ed658fc4b 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h +++ b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h @@ -144,6 +144,8 @@ namespace compiler { V(ArmF64x2Ne) \ V(ArmF64x2Lt) \ V(ArmF64x2Le) \ + V(ArmF64x2Pmin) \ + V(ArmF64x2Pmax) \ V(ArmF32x4Splat) \ V(ArmF32x4ExtractLane) \ V(ArmF32x4ReplaceLane) \ @@ -165,6 +167,8 @@ namespace compiler { V(ArmF32x4Ne) \ V(ArmF32x4Lt) \ V(ArmF32x4Le) \ + V(ArmF32x4Pmin) \ + V(ArmF32x4Pmax) \ V(ArmI64x2SplatI32Pair) \ V(ArmI64x2ReplaceLaneI32Pair) \ V(ArmI64x2Neg) \ @@ -304,12 +308,12 @@ namespace compiler { V(ArmS8x8Reverse) \ V(ArmS8x4Reverse) \ V(ArmS8x2Reverse) \ - V(ArmS1x4AnyTrue) \ - V(ArmS1x4AllTrue) \ - V(ArmS1x8AnyTrue) \ - V(ArmS1x8AllTrue) \ - V(ArmS1x16AnyTrue) \ - V(ArmS1x16AllTrue) \ + V(ArmV32x4AnyTrue) \ + V(ArmV32x4AllTrue) \ + V(ArmV16x8AnyTrue) \ + V(ArmV16x8AllTrue) \ + V(ArmV8x16AnyTrue) \ + V(ArmV8x16AllTrue) \ V(ArmS8x16LoadSplat) \ V(ArmS16x8LoadSplat) \ V(ArmS32x4LoadSplat) \ diff --git a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc index 8c09acd6df8..196aa1ce6c0 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc @@ -124,6 +124,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF64x2Ne: case kArmF64x2Lt: case kArmF64x2Le: + case kArmF64x2Pmin: + case kArmF64x2Pmax: case kArmF32x4Splat: case kArmF32x4ExtractLane: case kArmF32x4ReplaceLane: @@ -145,6 +147,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF32x4Ne: case kArmF32x4Lt: case kArmF32x4Le: + case kArmF32x4Pmin: + case kArmF32x4Pmax: case kArmI64x2SplatI32Pair: case kArmI64x2ReplaceLaneI32Pair: case kArmI64x2Neg: @@ -284,12 +288,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmS8x8Reverse: case kArmS8x4Reverse: case kArmS8x2Reverse: - case kArmS1x4AnyTrue: - case kArmS1x4AllTrue: - case kArmS1x8AnyTrue: - case kArmS1x8AllTrue: - case kArmS1x16AnyTrue: - case kArmS1x16AllTrue: + case kArmV32x4AnyTrue: + case kArmV32x4AllTrue: + case kArmV16x8AnyTrue: + case kArmV16x8AllTrue: + case kArmV8x16AnyTrue: + case kArmV8x16AllTrue: return kNoOpcodeFlags; case kArmVldrF32: diff --git a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc index 74658697b50..de0e7c4162c 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc @@ -1495,7 +1495,10 @@ void InstructionSelector::VisitUint32Mod(Node* node) { V(Float64RoundTruncate, kArmVrintzF64) \ V(Float64RoundTiesAway, kArmVrintaF64) \ V(Float32RoundTiesEven, kArmVrintnF32) \ - V(Float64RoundTiesEven, kArmVrintnF64) + V(Float64RoundTiesEven, kArmVrintnF64) \ + V(F32x4Ceil, kArmVrintpF32) \ + V(F32x4Floor, kArmVrintmF32) \ + V(F32x4Trunc, kArmVrintzF32) #define RRR_OP_LIST(V) \ V(Int32MulHigh, kArmSmmul) \ @@ -2525,12 +2528,12 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I8x16Neg, kArmI8x16Neg) \ V(I8x16Abs, kArmI8x16Abs) \ V(S128Not, kArmS128Not) \ - V(S1x4AnyTrue, kArmS1x4AnyTrue) \ - V(S1x4AllTrue, kArmS1x4AllTrue) \ - V(S1x8AnyTrue, kArmS1x8AnyTrue) \ - V(S1x8AllTrue, kArmS1x8AllTrue) \ - V(S1x16AnyTrue, kArmS1x16AnyTrue) \ - V(S1x16AllTrue, kArmS1x16AllTrue) + V(V32x4AnyTrue, kArmV32x4AnyTrue) \ + V(V32x4AllTrue, kArmV32x4AllTrue) \ + V(V16x8AnyTrue, kArmV16x8AnyTrue) \ + V(V16x8AllTrue, kArmV16x8AllTrue) \ + V(V8x16AnyTrue, kArmV8x16AnyTrue) \ + V(V8x16AllTrue, kArmV8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ V(I64x2Shl, 64) \ @@ -2941,6 +2944,42 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { VisitBitMask<kArmI32x4BitMask>(this, node); } +namespace { +void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + ArmOperandGenerator g(selector); + // Need all unique registers because we first compare the two inputs, then we + // need the inputs to remain unchanged for the bitselect later. + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); +} + +void VisitF64x2PminOrPMax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + ArmOperandGenerator g(selector); + selector->Emit(opcode, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1))); +} +} // namespace + +void InstructionSelector::VisitF32x4Pmin(Node* node) { + VisitF32x4PminOrPmax(this, kArmF32x4Pmin, node); +} + +void InstructionSelector::VisitF32x4Pmax(Node* node) { + VisitF32x4PminOrPmax(this, kArmF32x4Pmax, node); +} + +void InstructionSelector::VisitF64x2Pmin(Node* node) { + VisitF64x2PminOrPMax(this, kArmF64x2Pmin, node); +} + +void InstructionSelector::VisitF64x2Pmax(Node* node) { + VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc index 4cf19a5d802..d21440c35b3 100644 --- a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc +++ b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc @@ -502,8 +502,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode, __ asm_imm(i.OutputSimd128Register().format(), \ i.InputSimd128Register(0).format(), i.InputInt##width(1)); \ } else { \ - VRegister tmp = i.TempSimd128Register(0); \ - Register shift = i.TempRegister(1).gp(); \ + UseScratchRegisterScope temps(tasm()); \ + VRegister tmp = temps.AcquireQ(); \ + Register shift = temps.Acquire##gp(); \ constexpr int mask = (1 << width) - 1; \ __ And(shift, i.InputRegister32(1), mask); \ __ Dup(tmp.format(), shift); \ @@ -521,8 +522,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode, __ asm_imm(i.OutputSimd128Register().format(), \ i.InputSimd128Register(0).format(), i.InputInt##width(1)); \ } else { \ - VRegister tmp = i.TempSimd128Register(0); \ - Register shift = i.TempRegister(1).gp(); \ + UseScratchRegisterScope temps(tasm()); \ + VRegister tmp = temps.AcquireQ(); \ + Register shift = temps.Acquire##gp(); \ constexpr int mask = (1 << width) - 1; \ __ And(shift, i.InputRegister32(1), mask); \ __ Dup(tmp.format(), shift); \ @@ -1901,6 +1903,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D); + case kArm64F64x2Pmin: { + VRegister dst = i.OutputSimd128Register().V2D(); + VRegister lhs = i.InputSimd128Register(0).V2D(); + VRegister rhs = i.InputSimd128Register(1).V2D(); + // f64x2.pmin(lhs, rhs) + // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs)) + // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs)) + __ Fcmgt(dst, lhs, rhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F64x2Pmax: { + VRegister dst = i.OutputSimd128Register().V2D(); + VRegister lhs = i.InputSimd128Register(0).V2D(); + VRegister rhs = i.InputSimd128Register(1).V2D(); + // f64x2.pmax(lhs, rhs) + // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs)) + __ Fcmgt(dst, rhs, lhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F64x2RoundUp: + __ Frintp(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; + case kArm64F64x2RoundDown: + __ Frintm(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; + case kArm64F64x2RoundTruncate: + __ Frintz(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; + case kArm64F64x2RoundTiesEven: + __ Frintn(i.OutputSimd128Register().V2D(), + i.InputSimd128Register(0).V2D()); + break; case kArm64F32x4Splat: { __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0); break; @@ -1953,6 +1992,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S); SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S); + case kArm64F32x4Pmin: { + VRegister dst = i.OutputSimd128Register().V4S(); + VRegister lhs = i.InputSimd128Register(0).V4S(); + VRegister rhs = i.InputSimd128Register(1).V4S(); + // f32x4.pmin(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs)) + // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs)) + __ Fcmgt(dst, lhs, rhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F32x4Pmax: { + VRegister dst = i.OutputSimd128Register().V4S(); + VRegister lhs = i.InputSimd128Register(0).V4S(); + VRegister rhs = i.InputSimd128Register(1).V4S(); + // f32x4.pmax(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs)) + __ Fcmgt(dst, rhs, lhs); + __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); + break; + } + case kArm64F32x4RoundUp: + __ Frintp(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; + case kArm64F32x4RoundDown: + __ Frintm(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; + case kArm64F32x4RoundTruncate: + __ Frintz(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; + case kArm64F32x4RoundTiesEven: + __ Frintn(i.OutputSimd128Register().V4S(), + i.InputSimd128Register(0).V4S()); + break; case kArm64I64x2Splat: { __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0)); break; @@ -2132,6 +2208,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Mov(dst.W(), tmp.V4S(), 0); break; } + case kArm64I32x4DotI16x8S: { + UseScratchRegisterScope scope(tasm()); + VRegister lhs = i.InputSimd128Register(0); + VRegister rhs = i.InputSimd128Register(1); + VRegister tmp1 = scope.AcquireV(kFormat4S); + VRegister tmp2 = scope.AcquireV(kFormat4S); + __ Smull(tmp1, lhs.V4H(), rhs.V4H()); + __ Smull2(tmp2, lhs.V8H(), rhs.V8H()); + __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2); + break; + } case kArm64I16x8Splat: { __ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0)); break; @@ -2480,7 +2567,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B); SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B); SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B); - case kArm64S1x2AllTrue: { + case kArm64V64x2AllTrue: { UseScratchRegisterScope scope(tasm()); VRegister temp1 = scope.AcquireV(kFormat2D); VRegister temp2 = scope.AcquireV(kFormatS); @@ -2508,32 +2595,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } case kArm64I16x8Load8x8S: { - __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); __ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B()); break; } case kArm64I16x8Load8x8U: { - __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0)); __ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B()); break; } case kArm64I32x4Load16x4S: { - __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); __ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H()); break; } case kArm64I32x4Load16x4U: { - __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0)); __ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H()); break; } case kArm64I64x2Load32x2S: { - __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); __ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S()); break; } case kArm64I64x2Load32x2U: { - __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); + __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0)); __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S()); break; } @@ -2548,13 +2635,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; \ } // for AnyTrue, the format does not matter, umaxv does not support 2D - SIMD_REDUCE_OP_CASE(kArm64S1x2AnyTrue, Umaxv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64S1x4AnyTrue, Umaxv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64S1x4AllTrue, Uminv, kFormatS, 4S); - SIMD_REDUCE_OP_CASE(kArm64S1x8AnyTrue, Umaxv, kFormatH, 8H); - SIMD_REDUCE_OP_CASE(kArm64S1x8AllTrue, Uminv, kFormatH, 8H); - SIMD_REDUCE_OP_CASE(kArm64S1x16AnyTrue, Umaxv, kFormatB, 16B); - SIMD_REDUCE_OP_CASE(kArm64S1x16AllTrue, Uminv, kFormatB, 16B); + SIMD_REDUCE_OP_CASE(kArm64V64x2AnyTrue, Umaxv, kFormatS, 4S); + SIMD_REDUCE_OP_CASE(kArm64V32x4AnyTrue, Umaxv, kFormatS, 4S); + SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S); + SIMD_REDUCE_OP_CASE(kArm64V16x8AnyTrue, Umaxv, kFormatH, 8H); + SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H); + SIMD_REDUCE_OP_CASE(kArm64V8x16AnyTrue, Umaxv, kFormatB, 16B); + SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B); } return kSuccess; } // NOLINT(readability/fn_size) diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h index a8e2b52c028..41f9d78550e 100644 --- a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h +++ b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h @@ -186,6 +186,12 @@ namespace compiler { V(Arm64F64x2Le) \ V(Arm64F64x2Qfma) \ V(Arm64F64x2Qfms) \ + V(Arm64F64x2Pmin) \ + V(Arm64F64x2Pmax) \ + V(Arm64F64x2RoundUp) \ + V(Arm64F64x2RoundDown) \ + V(Arm64F64x2RoundTruncate) \ + V(Arm64F64x2RoundTiesEven) \ V(Arm64F32x4Splat) \ V(Arm64F32x4ExtractLane) \ V(Arm64F32x4ReplaceLane) \ @@ -209,6 +215,12 @@ namespace compiler { V(Arm64F32x4Le) \ V(Arm64F32x4Qfma) \ V(Arm64F32x4Qfms) \ + V(Arm64F32x4Pmin) \ + V(Arm64F32x4Pmax) \ + V(Arm64F32x4RoundUp) \ + V(Arm64F32x4RoundDown) \ + V(Arm64F32x4RoundTruncate) \ + V(Arm64F32x4RoundTiesEven) \ V(Arm64I64x2Splat) \ V(Arm64I64x2ExtractLane) \ V(Arm64I64x2ReplaceLane) \ @@ -256,6 +268,7 @@ namespace compiler { V(Arm64I32x4GeU) \ V(Arm64I32x4Abs) \ V(Arm64I32x4BitMask) \ + V(Arm64I32x4DotI16x8S) \ V(Arm64I16x8Splat) \ V(Arm64I16x8ExtractLaneU) \ V(Arm64I16x8ExtractLaneS) \ @@ -361,14 +374,14 @@ namespace compiler { V(Arm64S8x8Reverse) \ V(Arm64S8x4Reverse) \ V(Arm64S8x2Reverse) \ - V(Arm64S1x2AnyTrue) \ - V(Arm64S1x2AllTrue) \ - V(Arm64S1x4AnyTrue) \ - V(Arm64S1x4AllTrue) \ - V(Arm64S1x8AnyTrue) \ - V(Arm64S1x8AllTrue) \ - V(Arm64S1x16AnyTrue) \ - V(Arm64S1x16AllTrue) \ + V(Arm64V64x2AnyTrue) \ + V(Arm64V64x2AllTrue) \ + V(Arm64V32x4AnyTrue) \ + V(Arm64V32x4AllTrue) \ + V(Arm64V16x8AnyTrue) \ + V(Arm64V16x8AllTrue) \ + V(Arm64V8x16AnyTrue) \ + V(Arm64V8x16AllTrue) \ V(Arm64S8x16LoadSplat) \ V(Arm64S16x8LoadSplat) \ V(Arm64S32x4LoadSplat) \ diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc index 128ebdac957..3ea84730801 100644 --- a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc +++ b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc @@ -156,6 +156,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F64x2Le: case kArm64F64x2Qfma: case kArm64F64x2Qfms: + case kArm64F64x2Pmin: + case kArm64F64x2Pmax: + case kArm64F64x2RoundUp: + case kArm64F64x2RoundDown: + case kArm64F64x2RoundTruncate: + case kArm64F64x2RoundTiesEven: case kArm64F32x4Splat: case kArm64F32x4ExtractLane: case kArm64F32x4ReplaceLane: @@ -179,6 +185,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64F32x4Le: case kArm64F32x4Qfma: case kArm64F32x4Qfms: + case kArm64F32x4Pmin: + case kArm64F32x4Pmax: + case kArm64F32x4RoundUp: + case kArm64F32x4RoundDown: + case kArm64F32x4RoundTruncate: + case kArm64F32x4RoundTiesEven: case kArm64I64x2Splat: case kArm64I64x2ExtractLane: case kArm64I64x2ReplaceLane: @@ -226,6 +238,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64I32x4GeU: case kArm64I32x4Abs: case kArm64I32x4BitMask: + case kArm64I32x4DotI16x8S: case kArm64I16x8Splat: case kArm64I16x8ExtractLaneU: case kArm64I16x8ExtractLaneS: @@ -331,14 +344,14 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArm64S8x8Reverse: case kArm64S8x4Reverse: case kArm64S8x2Reverse: - case kArm64S1x2AnyTrue: - case kArm64S1x2AllTrue: - case kArm64S1x4AnyTrue: - case kArm64S1x4AllTrue: - case kArm64S1x8AnyTrue: - case kArm64S1x8AllTrue: - case kArm64S1x16AnyTrue: - case kArm64S1x16AllTrue: + case kArm64V64x2AnyTrue: + case kArm64V64x2AllTrue: + case kArm64V32x4AnyTrue: + case kArm64V32x4AllTrue: + case kArm64V16x8AnyTrue: + case kArm64V16x8AllTrue: + case kArm64V8x16AnyTrue: + case kArm64V8x16AllTrue: case kArm64TestAndBranch32: case kArm64TestAndBranch: case kArm64CompareAndBranch32: diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc index 06a87a8aab7..2e0d977c3c7 100644 --- a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc +++ b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc @@ -163,13 +163,9 @@ void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, g.UseImmediate(node->InputAt(1))); } } else { - InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()}; - // We only need a unique register for the first input (src), since in - // the codegen we use tmp to store the shifts, and then later use it with - // src. The second input can be the same as the second temp (shift). selector->Emit(opcode, g.DefineAsRegister(node), - g.UseUniqueRegister(node->InputAt(0)), - g.UseRegister(node->InputAt(1)), arraysize(temps), temps); + g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1))); } } @@ -608,18 +604,23 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode, void InstructionSelector::VisitLoadTransform(Node* node) { LoadTransformParameters params = LoadTransformParametersOf(node->op()); InstructionCode opcode = kArchNop; + bool require_add = false; switch (params.transformation) { case LoadTransformation::kS8x16LoadSplat: opcode = kArm64S8x16LoadSplat; + require_add = true; break; case LoadTransformation::kS16x8LoadSplat: opcode = kArm64S16x8LoadSplat; + require_add = true; break; case LoadTransformation::kS32x4LoadSplat: opcode = kArm64S32x4LoadSplat; + require_add = true; break; case LoadTransformation::kS64x2LoadSplat: opcode = kArm64S64x2LoadSplat; + require_add = true; break; case LoadTransformation::kI16x8Load8x8S: opcode = kArm64I16x8Load8x8S; @@ -655,13 +656,17 @@ void InstructionSelector::VisitLoadTransform(Node* node) { inputs[1] = g.UseRegister(index); outputs[0] = g.DefineAsRegister(node); - // ld1r uses post-index, so construct address first. - // TODO(v8:9886) If index can be immediate, use vldr without this add. - InstructionOperand addr = g.TempRegister(); - Emit(kArm64Add, 1, &addr, 2, inputs); - inputs[0] = addr; - inputs[1] = g.TempImmediate(0); - opcode |= AddressingModeField::encode(kMode_MRI); + if (require_add) { + // ld1r uses post-index, so construct address first. + // TODO(v8:9886) If index can be immediate, use vldr without this add. + InstructionOperand addr = g.TempRegister(); + Emit(kArm64Add, 1, &addr, 2, inputs); + inputs[0] = addr; + inputs[1] = g.TempImmediate(0); + opcode |= AddressingModeField::encode(kMode_MRI); + } else { + opcode |= AddressingModeField::encode(kMode_MRR); + } Emit(opcode, 1, outputs, 2, inputs); } @@ -1360,7 +1365,15 @@ void InstructionSelector::VisitWord64Ror(Node* node) { V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \ V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \ V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \ - V(Float64SilenceNaN, kArm64Float64SilenceNaN) + V(Float64SilenceNaN, kArm64Float64SilenceNaN) \ + V(F32x4Ceil, kArm64F32x4RoundUp) \ + V(F32x4Floor, kArm64F32x4RoundDown) \ + V(F32x4Trunc, kArm64F32x4RoundTruncate) \ + V(F32x4NearestInt, kArm64F32x4RoundTiesEven) \ + V(F64x2Ceil, kArm64F64x2RoundUp) \ + V(F64x2Floor, kArm64F64x2RoundDown) \ + V(F64x2Trunc, kArm64F64x2RoundTruncate) \ + V(F64x2NearestInt, kArm64F64x2RoundTiesEven) #define RRR_OP_LIST(V) \ V(Int32Div, kArm64Idiv32) \ @@ -3184,14 +3197,14 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I8x16Neg, kArm64I8x16Neg) \ V(I8x16Abs, kArm64I8x16Abs) \ V(S128Not, kArm64S128Not) \ - V(S1x2AnyTrue, kArm64S1x2AnyTrue) \ - V(S1x2AllTrue, kArm64S1x2AllTrue) \ - V(S1x4AnyTrue, kArm64S1x4AnyTrue) \ - V(S1x4AllTrue, kArm64S1x4AllTrue) \ - V(S1x8AnyTrue, kArm64S1x8AnyTrue) \ - V(S1x8AllTrue, kArm64S1x8AllTrue) \ - V(S1x16AnyTrue, kArm64S1x16AnyTrue) \ - V(S1x16AllTrue, kArm64S1x16AllTrue) + V(V64x2AnyTrue, kArm64V64x2AnyTrue) \ + V(V64x2AllTrue, kArm64V64x2AllTrue) \ + V(V32x4AnyTrue, kArm64V32x4AnyTrue) \ + V(V32x4AllTrue, kArm64V32x4AllTrue) \ + V(V16x8AnyTrue, kArm64V16x8AnyTrue) \ + V(V16x8AllTrue, kArm64V16x8AllTrue) \ + V(V8x16AnyTrue, kArm64V8x16AnyTrue) \ + V(V8x16AllTrue, kArm64V8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ V(I64x2Shl, 64) \ @@ -3249,6 +3262,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I32x4MaxU, kArm64I32x4MaxU) \ V(I32x4GtU, kArm64I32x4GtU) \ V(I32x4GeU, kArm64I32x4GeU) \ + V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \ V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \ V(I16x8AddSaturateS, kArm64I16x8AddSaturateS) \ V(I16x8AddHoriz, kArm64I16x8AddHoriz) \ @@ -3613,6 +3627,34 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) { VisitRR(this, kArm64Sxtw, node); } +namespace { +void VisitPminOrPmax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + Arm64OperandGenerator g(selector); + // Need all unique registers because we first compare the two inputs, then we + // need the inputs to remain unchanged for the bitselect later. + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); +} +} // namespace + +void InstructionSelector::VisitF32x4Pmin(Node* node) { + VisitPminOrPmax(this, kArm64F32x4Pmin, node); +} + +void InstructionSelector::VisitF32x4Pmax(Node* node) { + VisitPminOrPmax(this, kArm64F32x4Pmax, node); +} + +void InstructionSelector::VisitF64x2Pmin(Node* node) { + VisitPminOrPmax(this, kArm64F64x2Pmin, node); +} + +void InstructionSelector::VisitF64x2Pmax(Node* node) { + VisitPminOrPmax(this, kArm64F64x2Pmax, node); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/chromium/v8/src/compiler/backend/code-generator.cc b/chromium/v8/src/compiler/backend/code-generator.cc index 72c5750035a..83dccf69e82 100644 --- a/chromium/v8/src/compiler/backend/code-generator.cc +++ b/chromium/v8/src/compiler/backend/code-generator.cc @@ -55,19 +55,20 @@ CodeGenerator::CodeGenerator( frame_access_state_(nullptr), linkage_(linkage), instructions_(instructions), - unwinding_info_writer_(zone()), + unwinding_info_writer_(codegen_zone), info_(info), - labels_(zone()->NewArray<Label>(instructions->InstructionBlockCount())), + labels_( + codegen_zone->NewArray<Label>(instructions->InstructionBlockCount())), current_block_(RpoNumber::Invalid()), start_source_position_(start_source_position), current_source_position_(SourcePosition::Unknown()), tasm_(isolate, options, CodeObjectRequired::kNo, std::move(buffer)), resolver_(this), - safepoints_(zone()), - handlers_(zone()), - deoptimization_exits_(zone()), - deoptimization_literals_(zone()), - translations_(zone()), + safepoints_(codegen_zone), + handlers_(codegen_zone), + deoptimization_exits_(codegen_zone), + deoptimization_literals_(codegen_zone), + translations_(codegen_zone), max_unoptimized_frame_height_(max_unoptimized_frame_height), max_pushed_argument_count_(max_pushed_argument_count), caller_registers_saved_(false), @@ -77,12 +78,12 @@ CodeGenerator::CodeGenerator( osr_pc_offset_(-1), optimized_out_literal_id_(-1), source_position_table_builder_( - SourcePositionTableBuilder::RECORD_SOURCE_POSITIONS), - protected_instructions_(zone()), + codegen_zone, SourcePositionTableBuilder::RECORD_SOURCE_POSITIONS), + protected_instructions_(codegen_zone), result_(kSuccess), poisoning_level_(poisoning_level), - block_starts_(zone()), - instr_starts_(zone()) { + block_starts_(codegen_zone), + instr_starts_(codegen_zone) { for (int i = 0; i < instructions->InstructionBlockCount(); ++i) { new (&labels_[i]) Label; } @@ -161,7 +162,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleDeoptimizerCall( DeoptimizeReason deoptimization_reason = exit->reason(); Address deopt_entry = Deoptimizer::GetDeoptimizationEntry(tasm()->isolate(), deopt_kind); - if (info()->is_source_positions_enabled()) { + if (info()->source_positions()) { tasm()->RecordDeoptReason(deoptimization_reason, exit->pos(), deoptimization_id); } @@ -191,7 +192,7 @@ void CodeGenerator::AssembleCode() { // the frame (that is done in AssemblePrologue). FrameScope frame_scope(tasm(), StackFrame::MANUAL); - if (info->is_source_positions_enabled()) { + if (info->source_positions()) { AssembleSourcePosition(start_source_position()); } offsets_info_.code_start_register_check = tasm()->pc_offset(); @@ -242,7 +243,7 @@ void CodeGenerator::AssembleCode() { unwinding_info_writer_.SetNumberOfInstructionBlocks( instructions()->InstructionBlockCount()); - if (info->trace_turbo_json_enabled()) { + if (info->trace_turbo_json()) { block_starts_.assign(instructions()->instruction_blocks().size(), -1); instr_starts_.assign(instructions()->instructions().size(), {}); } @@ -253,7 +254,7 @@ void CodeGenerator::AssembleCode() { if (block->ShouldAlign() && !tasm()->jump_optimization_info()) { tasm()->CodeTargetAlign(); } - if (info->trace_turbo_json_enabled()) { + if (info->trace_turbo_json()) { block_starts_[block->rpo_number().ToInt()] = tasm()->pc_offset(); } // Bind a label for a block. @@ -503,6 +504,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() { .set_deoptimization_data(deopt_data) .set_is_turbofanned() .set_stack_slots(frame()->GetTotalFrameSlotCount()) + .set_profiler_data(info()->profiler_data()) .TryBuild(); Handle<Code> code; @@ -721,7 +723,7 @@ RpoNumber CodeGenerator::ComputeBranchInfo(BranchInfo* branch, CodeGenerator::CodeGenResult CodeGenerator::AssembleInstruction( int instruction_index, const InstructionBlock* block) { Instruction* instr = instructions()->InstructionAt(instruction_index); - if (info()->trace_turbo_json_enabled()) { + if (info()->trace_turbo_json()) { instr_starts_[instruction_index].gap_pc_offset = tasm()->pc_offset(); } int first_unused_stack_slot; @@ -741,14 +743,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleInstruction( if (instr->IsJump() && block->must_deconstruct_frame()) { AssembleDeconstructFrame(); } - if (info()->trace_turbo_json_enabled()) { + if (info()->trace_turbo_json()) { instr_starts_[instruction_index].arch_instr_pc_offset = tasm()->pc_offset(); } // Assemble architecture-specific code for the instruction. CodeGenResult result = AssembleArchInstruction(instr); if (result != kSuccess) return result; - if (info()->trace_turbo_json_enabled()) { + if (info()->trace_turbo_json()) { instr_starts_[instruction_index].condition_pc_offset = tasm()->pc_offset(); } @@ -832,7 +834,7 @@ void CodeGenerator::AssembleSourcePosition(SourcePosition source_position) { buffer << "-- "; // Turbolizer only needs the source position, as it can reconstruct // the inlining stack from other information. - if (info->trace_turbo_json_enabled() || !tasm()->isolate() || + if (info->trace_turbo_json() || !tasm()->isolate() || tasm()->isolate()->concurrent_recompilation_enabled()) { buffer << source_position; } else { @@ -979,7 +981,8 @@ void CodeGenerator::RecordCallPosition(Instruction* instr) { InstructionOperandConverter i(this, instr); RpoNumber handler_rpo = i.InputRpo(instr->InputCount() - 1); DCHECK(instructions()->InstructionBlockAt(handler_rpo)->IsHandler()); - handlers_.push_back({GetLabel(handler_rpo), tasm()->pc_offset()}); + handlers_.push_back( + {GetLabel(handler_rpo), tasm()->pc_offset_for_safepoint()}); } if (needs_frame_state) { @@ -989,7 +992,7 @@ void CodeGenerator::RecordCallPosition(Instruction* instr) { size_t frame_state_offset = 2; FrameStateDescriptor* descriptor = GetDeoptimizationEntry(instr, frame_state_offset).descriptor(); - int pc_offset = tasm()->pc_offset(); + int pc_offset = tasm()->pc_offset_for_safepoint(); BuildTranslation(instr, pc_offset, frame_state_offset, descriptor->state_combine()); } @@ -1329,7 +1332,7 @@ void CodeGenerator::InitializeSpeculationPoison() { if (info()->called_with_code_start_register()) { tasm()->RecordComment("-- Prologue: generate speculation poison --"); GenerateSpeculationPoisonFromCodeStartRegister(); - if (info()->is_poisoning_register_arguments()) { + if (info()->poison_register_arguments()) { AssembleRegisterArgumentPoisoning(); } } else { diff --git a/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc index c673458c753..f5a69eec3ea 100644 --- a/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc @@ -2032,6 +2032,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Maxpd(dst, dst, i.InputSimd128Register(1)); break; } + case kIA32F64x2Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode); + break; + } case kIA32I64x2SplatI32Pair: { XMMRegister dst = i.OutputSimd128Register(); __ Pinsrd(dst, i.InputRegister(0), 0); @@ -2442,6 +2448,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Maxps(dst, dst, i.InputSimd128Register(1)); break; } + case kIA32F32x4Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode); + break; + } case kIA32I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); @@ -2795,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0)); break; } + case kIA32I32x4DotI16x8S: { + __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } case kIA32I16x8Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); @@ -3687,7 +3704,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // Out-of-range indices should return 0, add 112 so that any value > 15 // saturates to 128 (top bit set), so pshufb will zero that lane. - __ Move(mask, (uint32_t)0x70707070); + __ Move(mask, uint32_t{0x70707070}); __ Pshufd(mask, mask, 0x0); __ Paddusb(mask, i.InputSimd128Register(1)); __ Pshufb(dst, mask); @@ -4094,9 +4111,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vpor(dst, dst, kScratchDoubleReg); break; } - case kIA32S1x4AnyTrue: - case kIA32S1x8AnyTrue: - case kIA32S1x16AnyTrue: { + case kIA32V32x4AnyTrue: + case kIA32V16x8AnyTrue: + case kIA32V8x16AnyTrue: { Register dst = i.OutputRegister(); XMMRegister src = i.InputSimd128Register(0); Register tmp = i.TempRegister(0); @@ -4110,13 +4127,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1 // respectively. - case kIA32S1x4AllTrue: + case kIA32V32x4AllTrue: ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd); break; - case kIA32S1x8AllTrue: + case kIA32V16x8AllTrue: ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw); break; - case kIA32S1x16AllTrue: { + case kIA32V8x16AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb); break; } diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h b/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h index d347d672021..4c49539c4e9 100644 --- a/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -136,6 +136,7 @@ namespace compiler { V(IA32F64x2Le) \ V(IA32F64x2Pmin) \ V(IA32F64x2Pmax) \ + V(IA32F64x2Round) \ V(IA32I64x2SplatI32Pair) \ V(IA32I64x2ReplaceLaneI32Pair) \ V(IA32I64x2Neg) \ @@ -186,6 +187,7 @@ namespace compiler { V(AVXF32x4Le) \ V(IA32F32x4Pmin) \ V(IA32F32x4Pmax) \ + V(IA32F32x4Round) \ V(IA32I32x4Splat) \ V(IA32I32x4ExtractLane) \ V(SSEI32x4ReplaceLane) \ @@ -232,6 +234,7 @@ namespace compiler { V(AVXI32x4GeU) \ V(IA32I32x4Abs) \ V(IA32I32x4BitMask) \ + V(IA32I32x4DotI16x8S) \ V(IA32I16x8Splat) \ V(IA32I16x8ExtractLaneU) \ V(IA32I16x8ExtractLaneS) \ @@ -396,12 +399,12 @@ namespace compiler { V(AVXS8x4Reverse) \ V(SSES8x2Reverse) \ V(AVXS8x2Reverse) \ - V(IA32S1x4AnyTrue) \ - V(IA32S1x4AllTrue) \ - V(IA32S1x8AnyTrue) \ - V(IA32S1x8AllTrue) \ - V(IA32S1x16AnyTrue) \ - V(IA32S1x16AllTrue) \ + V(IA32V32x4AnyTrue) \ + V(IA32V32x4AllTrue) \ + V(IA32V16x8AnyTrue) \ + V(IA32V16x8AllTrue) \ + V(IA32V8x16AnyTrue) \ + V(IA32V8x16AllTrue) \ V(IA32Word32AtomicPairLoad) \ V(IA32Word32AtomicPairStore) \ V(IA32Word32AtomicPairAdd) \ diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index 52f0b0356ff..6d0062ba09e 100644 --- a/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -117,6 +117,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32F64x2Le: case kIA32F64x2Pmin: case kIA32F64x2Pmax: + case kIA32F64x2Round: case kIA32I64x2SplatI32Pair: case kIA32I64x2ReplaceLaneI32Pair: case kIA32I64x2Neg: @@ -167,6 +168,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXF32x4Le: case kIA32F32x4Pmin: case kIA32F32x4Pmax: + case kIA32F32x4Round: case kIA32I32x4Splat: case kIA32I32x4ExtractLane: case kSSEI32x4ReplaceLane: @@ -213,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXI32x4GeU: case kIA32I32x4Abs: case kIA32I32x4BitMask: + case kIA32I32x4DotI16x8S: case kIA32I16x8Splat: case kIA32I16x8ExtractLaneU: case kIA32I16x8ExtractLaneS: @@ -367,12 +370,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXS8x4Reverse: case kSSES8x2Reverse: case kAVXS8x2Reverse: - case kIA32S1x4AnyTrue: - case kIA32S1x4AllTrue: - case kIA32S1x8AnyTrue: - case kIA32S1x8AllTrue: - case kIA32S1x16AnyTrue: - case kIA32S1x16AllTrue: + case kIA32V32x4AnyTrue: + case kIA32V32x4AllTrue: + case kIA32V16x8AnyTrue: + case kIA32V16x8AllTrue: + case kIA32V8x16AnyTrue: + case kIA32V8x16AllTrue: return (instr->addressing_mode() == kMode_None) ? kNoOpcodeFlags : kIsLoadOperation | kHasSideEffect; diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc b/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc index c50464f4b86..5ed7c24e6bf 100644 --- a/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node, } } +// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be +// a register as we don't have memory alignment yet. For AVX, memory operands +// are fine, but can have performance issues if not aligned to 16/32 bytes +// (based on load size), see SDM Vol 1, chapter 14.9 +void VisitRROSimd(InstructionSelector* selector, Node* node, + ArchOpcode avx_opcode, ArchOpcode sse_opcode) { + IA32OperandGenerator g(selector); + InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); + if (selector->IsSupported(AVX)) { + selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, + g.Use(node->InputAt(1))); + } else { + selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, + g.UseRegister(node->InputAt(1))); + } +} + void VisitRRISimd(InstructionSelector* selector, Node* node, ArchOpcode opcode) { IA32OperandGenerator g(selector); @@ -941,7 +958,16 @@ void InstructionSelector::VisitWord32Ror(Node* node) { V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \ V(Float32RoundTiesEven, \ kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \ - V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest)) + V(Float64RoundTiesEven, \ + kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \ + V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \ + V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \ + V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \ + V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \ + V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \ + V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \ + V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \ + V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest)) #define RRO_FLOAT_OP_LIST(V) \ V(Float32Add, kAVXFloat32Add, kSSEFloat32Add) \ @@ -2100,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { #define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \ V(I64x2Add) \ V(I64x2Sub) \ + V(I32x4DotI16x8S) \ V(I16x8RoundingAverageU) \ V(I8x16RoundingAverageU) @@ -2131,14 +2158,14 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(S128Not) #define SIMD_ANYTRUE_LIST(V) \ - V(S1x4AnyTrue) \ - V(S1x8AnyTrue) \ - V(S1x16AnyTrue) + V(V32x4AnyTrue) \ + V(V16x8AnyTrue) \ + V(V8x16AnyTrue) #define SIMD_ALLTRUE_LIST(V) \ - V(S1x4AllTrue) \ - V(S1x8AllTrue) \ - V(S1x16AllTrue) + V(V32x4AllTrue) \ + V(V16x8AllTrue) \ + V(V8x16AllTrue) #define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \ V(I64x2Shl) \ @@ -2372,10 +2399,15 @@ SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX) #undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX #undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX -#define VISIT_SIMD_UNOP(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - IA32OperandGenerator g(this); \ - Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \ +// TODO(v8:9198): SSE requires operand0 to be a register as we don't have memory +// alignment yet. For AVX, memory operands are fine, but can have performance +// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1, +// chapter 14.9 +#define VISIT_SIMD_UNOP(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + IA32OperandGenerator g(this); \ + Emit(kIA32##Opcode, g.DefineAsRegister(node), \ + g.UseRegister(node->InputAt(0))); \ } SIMD_UNOP_LIST(VISIT_SIMD_UNOP) #undef VISIT_SIMD_UNOP @@ -2407,23 +2439,23 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE) IA32OperandGenerator g(this); \ InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ Emit(kIA32##Opcode, g.DefineAsRegister(node), \ - g.UseUnique(node->InputAt(0)), arraysize(temps), temps); \ + g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ } SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE) #undef VISIT_SIMD_ALLTRUE #undef SIMD_ALLTRUE_LIST -#define VISIT_SIMD_BINOP(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \ +#define VISIT_SIMD_BINOP(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \ } SIMD_BINOP_LIST(VISIT_SIMD_BINOP) #undef VISIT_SIMD_BINOP #undef SIMD_BINOP_LIST -#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \ +#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \ } SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX) #undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX diff --git a/chromium/v8/src/compiler/backend/instruction-selector-impl.h b/chromium/v8/src/compiler/backend/instruction-selector-impl.h index aa7da85e42b..7e1f183fb71 100644 --- a/chromium/v8/src/compiler/backend/instruction-selector-impl.h +++ b/chromium/v8/src/compiler/backend/instruction-selector-impl.h @@ -356,6 +356,8 @@ class OperandGenerator { case MachineRepresentation::kCompressed: case MachineRepresentation::kCompressedPointer: return Constant(static_cast<int32_t>(0)); + case MachineRepresentation::kWord64: + return Constant(static_cast<int64_t>(0)); case MachineRepresentation::kFloat64: return Constant(static_cast<double>(0)); case MachineRepresentation::kFloat32: diff --git a/chromium/v8/src/compiler/backend/instruction-selector.cc b/chromium/v8/src/compiler/backend/instruction-selector.cc index c2022b574ee..8ad88b946b4 100644 --- a/chromium/v8/src/compiler/backend/instruction-selector.cc +++ b/chromium/v8/src/compiler/backend/instruction-selector.cc @@ -1043,7 +1043,8 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer, InstructionOperand op = g.UseLocation(*iter, location); UnallocatedOperand unallocated = UnallocatedOperand::cast(op); if (unallocated.HasFixedSlotPolicy() && !call_tail) { - int stack_index = -unallocated.fixed_slot_index() - 1; + int stack_index = buffer->descriptor->GetStackIndexFromSlot( + unallocated.fixed_slot_index()); // This can insert empty slots before stack_index and will insert enough // slots after stack_index to store the parameter. if (static_cast<size_t>(stack_index) >= buffer->pushed_nodes.size()) { @@ -1888,6 +1889,14 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF64x2Pmin(node); case IrOpcode::kF64x2Pmax: return MarkAsSimd128(node), VisitF64x2Pmax(node); + case IrOpcode::kF64x2Ceil: + return MarkAsSimd128(node), VisitF64x2Ceil(node); + case IrOpcode::kF64x2Floor: + return MarkAsSimd128(node), VisitF64x2Floor(node); + case IrOpcode::kF64x2Trunc: + return MarkAsSimd128(node), VisitF64x2Trunc(node); + case IrOpcode::kF64x2NearestInt: + return MarkAsSimd128(node), VisitF64x2NearestInt(node); case IrOpcode::kF32x4Splat: return MarkAsSimd128(node), VisitF32x4Splat(node); case IrOpcode::kF32x4ExtractLane: @@ -1938,6 +1947,14 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF32x4Pmin(node); case IrOpcode::kF32x4Pmax: return MarkAsSimd128(node), VisitF32x4Pmax(node); + case IrOpcode::kF32x4Ceil: + return MarkAsSimd128(node), VisitF32x4Ceil(node); + case IrOpcode::kF32x4Floor: + return MarkAsSimd128(node), VisitF32x4Floor(node); + case IrOpcode::kF32x4Trunc: + return MarkAsSimd128(node), VisitF32x4Trunc(node); + case IrOpcode::kF32x4NearestInt: + return MarkAsSimd128(node), VisitF32x4NearestInt(node); case IrOpcode::kI64x2Splat: return MarkAsSimd128(node), VisitI64x2Splat(node); case IrOpcode::kI64x2SplatI32Pair: @@ -2040,6 +2057,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitI32x4Abs(node); case IrOpcode::kI32x4BitMask: return MarkAsWord32(node), VisitI32x4BitMask(node); + case IrOpcode::kI32x4DotI16x8S: + return MarkAsSimd128(node), VisitI32x4DotI16x8S(node); case IrOpcode::kI16x8Splat: return MarkAsSimd128(node), VisitI16x8Splat(node); case IrOpcode::kI16x8ExtractLaneU: @@ -2188,22 +2207,22 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitS8x16Swizzle(node); case IrOpcode::kS8x16Shuffle: return MarkAsSimd128(node), VisitS8x16Shuffle(node); - case IrOpcode::kS1x2AnyTrue: - return MarkAsWord32(node), VisitS1x2AnyTrue(node); - case IrOpcode::kS1x2AllTrue: - return MarkAsWord32(node), VisitS1x2AllTrue(node); - case IrOpcode::kS1x4AnyTrue: - return MarkAsWord32(node), VisitS1x4AnyTrue(node); - case IrOpcode::kS1x4AllTrue: - return MarkAsWord32(node), VisitS1x4AllTrue(node); - case IrOpcode::kS1x8AnyTrue: - return MarkAsWord32(node), VisitS1x8AnyTrue(node); - case IrOpcode::kS1x8AllTrue: - return MarkAsWord32(node), VisitS1x8AllTrue(node); - case IrOpcode::kS1x16AnyTrue: - return MarkAsWord32(node), VisitS1x16AnyTrue(node); - case IrOpcode::kS1x16AllTrue: - return MarkAsWord32(node), VisitS1x16AllTrue(node); + case IrOpcode::kV64x2AnyTrue: + return MarkAsWord32(node), VisitV64x2AnyTrue(node); + case IrOpcode::kV64x2AllTrue: + return MarkAsWord32(node), VisitV64x2AllTrue(node); + case IrOpcode::kV32x4AnyTrue: + return MarkAsWord32(node), VisitV32x4AnyTrue(node); + case IrOpcode::kV32x4AllTrue: + return MarkAsWord32(node), VisitV32x4AllTrue(node); + case IrOpcode::kV16x8AnyTrue: + return MarkAsWord32(node), VisitV16x8AnyTrue(node); + case IrOpcode::kV16x8AllTrue: + return MarkAsWord32(node), VisitV16x8AllTrue(node); + case IrOpcode::kV8x16AnyTrue: + return MarkAsWord32(node), VisitV8x16AnyTrue(node); + case IrOpcode::kV8x16AllTrue: + return MarkAsWord32(node), VisitV8x16AllTrue(node); default: FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(), node->op()->mnemonic(), node->id()); @@ -2638,8 +2657,8 @@ void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV64x2AnyTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV64x2AllTrue(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); } @@ -2651,23 +2670,45 @@ void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X +#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 && \ + !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_MIPS && \ + !V8_TARGET_ARCH_MIPS64 // TODO(v8:10308) Bitmask operations are in prototype now, we can remove these // guards when they go into the proposal. -#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 && \ - !V8_TARGET_ARCH_X64 void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI16x8BitMask(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); } -#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 - // && !V8_TARGET_ARCH_X64 - // TODO(v8:10501) Prototyping pmin and pmax instructions. -#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } -#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 +#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 + // && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && + // !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 + +#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X && \ + !V8_TARGET_ARCH_IA32 +// TODO(v8:10553) Prototyping floating point rounding instructions. +// TODO(zhin): Temporary convoluted way to for unimplemented opcodes on ARM as +// we are implementing them one at a time. +#if !V8_TARGET_ARCH_ARM +void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_ARM +void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X + // && !V8_TARGET_ARCH_IA32 + +#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 +// TODO(v8:10583) Prototype i32x4.dot_i16x8_s +void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64 void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } @@ -2808,7 +2849,7 @@ void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) { switch (call_descriptor->kind()) { case CallDescriptor::kCallAddress: { int misc_field = static_cast<int>(call_descriptor->ParameterCount()); -#if defined(_AIX) +#if ABI_USES_FUNCTION_DESCRIPTORS // Highest misc_field bit is used on AIX to indicate if a CFunction call // has function descriptor or not. if (!call_descriptor->NoFunctionDescriptor()) { @@ -3038,7 +3079,7 @@ void InstructionSelector::VisitUnreachable(Node* node) { void InstructionSelector::VisitStaticAssert(Node* node) { Node* asserted = node->InputAt(0); - asserted->Print(2); + asserted->Print(4); FATAL("Expected turbofan static assert to hold, but got non-true input!\n"); } diff --git a/chromium/v8/src/compiler/backend/instruction.h b/chromium/v8/src/compiler/backend/instruction.h index e189100c346..f40a4198f81 100644 --- a/chromium/v8/src/compiler/backend/instruction.h +++ b/chromium/v8/src/compiler/backend/instruction.h @@ -1536,7 +1536,7 @@ class V8_EXPORT_PRIVATE InstructionSequence final return virtual_register; } Constant GetConstant(int virtual_register) const { - ConstantMap::const_iterator it = constants_.find(virtual_register); + auto it = constants_.find(virtual_register); DCHECK(it != constants_.end()); DCHECK_EQ(virtual_register, it->first); return it->second; diff --git a/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc b/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc index c83a4e28ee1..b9c1eb11d92 100644 --- a/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc +++ b/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc @@ -2159,6 +2159,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ insert_w(dst, i.InputInt8(1) * 2 + 1, kScratchReg); break; } + case kMipsF64x2Pmin: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = rhs < lhs ? rhs : lhs + __ fclt_d(dst, rhs, lhs); + __ bsel_v(dst, lhs, rhs); + break; + } + case kMipsF64x2Pmax: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = lhs < rhs ? rhs : lhs + __ fclt_d(dst, lhs, rhs); + __ bsel_v(dst, lhs, rhs); + break; + } case kMipsI64x2Add: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ addv_d(i.OutputSimd128Register(), i.InputSimd128Register(0), @@ -2395,6 +2415,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kMipsF32x4Pmin: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = rhs < lhs ? rhs : lhs + __ fclt_w(dst, rhs, lhs); + __ bsel_v(dst, lhs, rhs); + break; + } + case kMipsF32x4Pmax: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = lhs < rhs ? rhs : lhs + __ fclt_w(dst, lhs, rhs); + __ bsel_v(dst, lhs, rhs); + break; + } case kMipsI32x4SConvertF32x4: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); @@ -2442,6 +2482,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( kSimd128RegZero); break; } + case kMipsI32x4BitMask: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Register dst = i.OutputRegister(); + Simd128Register src = i.InputSimd128Register(0); + Simd128Register scratch0 = kSimd128RegZero; + Simd128Register scratch1 = kSimd128ScratchReg; + __ srli_w(scratch0, src, 31); + __ srli_d(scratch1, scratch0, 31); + __ or_v(scratch0, scratch0, scratch1); + __ shf_w(scratch1, scratch0, 0x0E); + __ slli_d(scratch1, scratch1, 2); + __ or_v(scratch0, scratch0, scratch1); + __ copy_u_b(dst, scratch0, 0); + break; + } case kMipsI16x8Splat: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ fill_h(i.OutputSimd128Register(), i.InputRegister(0)); @@ -2609,6 +2664,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( kSimd128RegZero); break; } + case kMipsI16x8BitMask: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Register dst = i.OutputRegister(); + Simd128Register src = i.InputSimd128Register(0); + Simd128Register scratch0 = kSimd128RegZero; + Simd128Register scratch1 = kSimd128ScratchReg; + __ srli_h(scratch0, src, 15); + __ srli_w(scratch1, scratch0, 15); + __ or_v(scratch0, scratch0, scratch1); + __ srli_d(scratch1, scratch0, 30); + __ or_v(scratch0, scratch0, scratch1); + __ shf_w(scratch1, scratch0, 0x0E); + __ slli_d(scratch1, scratch1, 4); + __ or_v(scratch0, scratch0, scratch1); + __ copy_u_b(dst, scratch0, 0); + break; + } case kMipsI8x16Splat: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ fill_b(i.OutputSimd128Register(), i.InputRegister(0)); @@ -2776,6 +2848,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( kSimd128RegZero); break; } + case kMipsI8x16BitMask: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Register dst = i.OutputRegister(); + Simd128Register src = i.InputSimd128Register(0); + Simd128Register scratch0 = kSimd128RegZero; + Simd128Register scratch1 = kSimd128ScratchReg; + __ srli_b(scratch0, src, 7); + __ srli_h(scratch1, scratch0, 7); + __ or_v(scratch0, scratch0, scratch1); + __ srli_w(scratch1, scratch0, 14); + __ or_v(scratch0, scratch0, scratch1); + __ srli_d(scratch1, scratch0, 28); + __ or_v(scratch0, scratch0, scratch1); + __ shf_w(scratch1, scratch0, 0x0E); + __ ilvev_b(scratch0, scratch1, scratch0); + __ copy_u_h(dst, scratch0, 0); + break; + } case kMipsS128And: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0), @@ -2800,9 +2890,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0)); break; } - case kMipsS1x4AnyTrue: - case kMipsS1x8AnyTrue: - case kMipsS1x16AnyTrue: { + case kMipsV32x4AnyTrue: + case kMipsV16x8AnyTrue: + case kMipsV8x16AnyTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_false; @@ -2814,7 +2904,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bind(&all_false); break; } - case kMipsS1x4AllTrue: { + case kMipsV32x4AllTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_true; @@ -2825,7 +2915,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bind(&all_true); break; } - case kMipsS1x8AllTrue: { + case kMipsV16x8AllTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_true; @@ -2836,7 +2926,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bind(&all_true); break; } - case kMipsS1x16AllTrue: { + case kMipsV8x16AllTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_true; diff --git a/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h b/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h index 0a37dd70683..27418935dd3 100644 --- a/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h +++ b/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h @@ -155,6 +155,8 @@ namespace compiler { V(MipsF64x2Ne) \ V(MipsF64x2Lt) \ V(MipsF64x2Le) \ + V(MipsF64x2Pmin) \ + V(MipsF64x2Pmax) \ V(MipsI64x2Add) \ V(MipsI64x2Sub) \ V(MipsI64x2Mul) \ @@ -196,6 +198,8 @@ namespace compiler { V(MipsF32x4Ne) \ V(MipsF32x4Lt) \ V(MipsF32x4Le) \ + V(MipsF32x4Pmin) \ + V(MipsF32x4Pmax) \ V(MipsI32x4SConvertF32x4) \ V(MipsI32x4UConvertF32x4) \ V(MipsI32x4Neg) \ @@ -204,6 +208,7 @@ namespace compiler { V(MipsI32x4GtU) \ V(MipsI32x4GeU) \ V(MipsI32x4Abs) \ + V(MipsI32x4BitMask) \ V(MipsI16x8Splat) \ V(MipsI16x8ExtractLaneU) \ V(MipsI16x8ExtractLaneS) \ @@ -232,6 +237,7 @@ namespace compiler { V(MipsI16x8GeU) \ V(MipsI16x8RoundingAverageU) \ V(MipsI16x8Abs) \ + V(MipsI16x8BitMask) \ V(MipsI8x16Splat) \ V(MipsI8x16ExtractLaneU) \ V(MipsI8x16ExtractLaneS) \ @@ -259,18 +265,19 @@ namespace compiler { V(MipsI8x16GeU) \ V(MipsI8x16RoundingAverageU) \ V(MipsI8x16Abs) \ + V(MipsI8x16BitMask) \ V(MipsS128And) \ V(MipsS128Or) \ V(MipsS128Xor) \ V(MipsS128Not) \ V(MipsS128Select) \ V(MipsS128AndNot) \ - V(MipsS1x4AnyTrue) \ - V(MipsS1x4AllTrue) \ - V(MipsS1x8AnyTrue) \ - V(MipsS1x8AllTrue) \ - V(MipsS1x16AnyTrue) \ - V(MipsS1x16AllTrue) \ + V(MipsV32x4AnyTrue) \ + V(MipsV32x4AllTrue) \ + V(MipsV16x8AnyTrue) \ + V(MipsV16x8AllTrue) \ + V(MipsV8x16AnyTrue) \ + V(MipsV8x16AllTrue) \ V(MipsS32x4InterleaveRight) \ V(MipsS32x4InterleaveLeft) \ V(MipsS32x4PackEven) \ diff --git a/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc b/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc index 81bbfbbfb9b..5180a1d4ed0 100644 --- a/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc +++ b/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc @@ -57,6 +57,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsF64x2Splat: case kMipsF64x2ExtractLane: case kMipsF64x2ReplaceLane: + case kMipsF64x2Pmin: + case kMipsF64x2Pmax: case kMipsI64x2Add: case kMipsI64x2Sub: case kMipsI64x2Mul: @@ -85,6 +87,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsF32x4Splat: case kMipsF32x4Sub: case kMipsF32x4UConvertI32x4: + case kMipsF32x4Pmin: + case kMipsF32x4Pmax: case kMipsFloat32Max: case kMipsFloat32Min: case kMipsFloat32RoundDown: @@ -138,6 +142,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsI16x8UConvertI8x16High: case kMipsI16x8UConvertI8x16Low: case kMipsI16x8Abs: + case kMipsI16x8BitMask: case kMipsI32x4Add: case kMipsI32x4AddHoriz: case kMipsI32x4Eq: @@ -166,6 +171,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsI32x4UConvertI16x8High: case kMipsI32x4UConvertI16x8Low: case kMipsI32x4Abs: + case kMipsI32x4BitMask: case kMipsI8x16Add: case kMipsI8x16AddSaturateS: case kMipsI8x16AddSaturateU: @@ -195,6 +201,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsI8x16SubSaturateU: case kMipsI8x16UConvertI16x8: case kMipsI8x16Abs: + case kMipsI8x16BitMask: case kMipsIns: case kMipsLsa: case kMipsMaddD: @@ -238,12 +245,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsS16x8InterleaveRight: case kMipsS16x8PackEven: case kMipsS16x8PackOdd: - case kMipsS1x16AllTrue: - case kMipsS1x16AnyTrue: - case kMipsS1x4AllTrue: - case kMipsS1x4AnyTrue: - case kMipsS1x8AllTrue: - case kMipsS1x8AnyTrue: + case kMipsV8x16AllTrue: + case kMipsV8x16AnyTrue: + case kMipsV32x4AllTrue: + case kMipsV32x4AnyTrue: + case kMipsV16x8AllTrue: + case kMipsV16x8AnyTrue: case kMipsS32x4InterleaveEven: case kMipsS32x4InterleaveLeft: case kMipsS32x4InterleaveOdd: diff --git a/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc b/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc index dac94fae272..2785186b827 100644 --- a/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc +++ b/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc @@ -113,6 +113,14 @@ static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, g.UseRegister(node->InputAt(1))); } +static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + MipsOperandGenerator g(selector); + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); +} + void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { MipsOperandGenerator g(selector); selector->Emit( @@ -2111,12 +2119,12 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I16x8UConvertI8x16High, kMipsI16x8UConvertI8x16High) \ V(I8x16Neg, kMipsI8x16Neg) \ V(S128Not, kMipsS128Not) \ - V(S1x4AnyTrue, kMipsS1x4AnyTrue) \ - V(S1x4AllTrue, kMipsS1x4AllTrue) \ - V(S1x8AnyTrue, kMipsS1x8AnyTrue) \ - V(S1x8AllTrue, kMipsS1x8AllTrue) \ - V(S1x16AnyTrue, kMipsS1x16AnyTrue) \ - V(S1x16AllTrue, kMipsS1x16AllTrue) + V(V32x4AnyTrue, kMipsV32x4AnyTrue) \ + V(V32x4AllTrue, kMipsV32x4AllTrue) \ + V(V16x8AnyTrue, kMipsV16x8AnyTrue) \ + V(V16x8AllTrue, kMipsV16x8AllTrue) \ + V(V8x16AnyTrue, kMipsV8x16AnyTrue) \ + V(V8x16AllTrue, kMipsV8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ V(I64x2Shl) \ @@ -2172,6 +2180,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I32x4GtU, kMipsI32x4GtU) \ V(I32x4GeU, kMipsI32x4GeU) \ V(I32x4Abs, kMipsI32x4Abs) \ + V(I32x4BitMask, kMipsI32x4BitMask) \ V(I16x8Add, kMipsI16x8Add) \ V(I16x8AddSaturateS, kMipsI16x8AddSaturateS) \ V(I16x8AddSaturateU, kMipsI16x8AddSaturateU) \ @@ -2194,6 +2203,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4) \ V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \ V(I16x8Abs, kMipsI16x8Abs) \ + V(I16x8BitMask, kMipsI16x8BitMask) \ V(I8x16Add, kMipsI8x16Add) \ V(I8x16AddSaturateS, kMipsI8x16AddSaturateS) \ V(I8x16AddSaturateU, kMipsI8x16AddSaturateU) \ @@ -2215,6 +2225,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I8x16SConvertI16x8, kMipsI8x16SConvertI16x8) \ V(I8x16UConvertI16x8, kMipsI8x16UConvertI16x8) \ V(I8x16Abs, kMipsI8x16Abs) \ + V(I8x16BitMask, kMipsI8x16BitMask) \ V(S128And, kMipsS128And) \ V(S128Or, kMipsS128Or) \ V(S128Xor, kMipsS128Xor) \ @@ -2406,6 +2417,22 @@ void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) { Emit(kMipsSeh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); } +void InstructionSelector::VisitF32x4Pmin(Node* node) { + VisitUniqueRRR(this, kMipsF32x4Pmin, node); +} + +void InstructionSelector::VisitF32x4Pmax(Node* node) { + VisitUniqueRRR(this, kMipsF32x4Pmax, node); +} + +void InstructionSelector::VisitF64x2Pmin(Node* node) { + VisitUniqueRRR(this, kMipsF64x2Pmin, node); +} + +void InstructionSelector::VisitF64x2Pmax(Node* node) { + VisitUniqueRRR(this, kMipsF64x2Pmax, node); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc b/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc index 197167c01cd..9acd6459de5 100644 --- a/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc +++ b/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc @@ -2265,6 +2265,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputInt8(1)); break; } + case kMips64F64x2Pmin: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = rhs < lhs ? rhs : lhs + __ fclt_d(dst, rhs, lhs); + __ bsel_v(dst, lhs, rhs); + break; + } + case kMips64F64x2Pmax: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = lhs < rhs ? rhs : lhs + __ fclt_d(dst, lhs, rhs); + __ bsel_v(dst, lhs, rhs); + break; + } case kMips64I64x2ReplaceLane: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Simd128Register src = i.InputSimd128Register(0); @@ -2581,6 +2601,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kMips64F32x4Pmin: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = rhs < lhs ? rhs : lhs + __ fclt_w(dst, rhs, lhs); + __ bsel_v(dst, lhs, rhs); + break; + } + case kMips64F32x4Pmax: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + // dst = lhs < rhs ? rhs : lhs + __ fclt_w(dst, lhs, rhs); + __ bsel_v(dst, lhs, rhs); + break; + } case kMips64I32x4SConvertF32x4: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); @@ -2634,6 +2674,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( kSimd128RegZero); break; } + case kMips64I32x4BitMask: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Register dst = i.OutputRegister(); + Simd128Register src = i.InputSimd128Register(0); + Simd128Register scratch0 = kSimd128RegZero; + Simd128Register scratch1 = kSimd128ScratchReg; + __ srli_w(scratch0, src, 31); + __ srli_d(scratch1, scratch0, 31); + __ or_v(scratch0, scratch0, scratch1); + __ shf_w(scratch1, scratch0, 0x0E); + __ slli_d(scratch1, scratch1, 2); + __ or_v(scratch0, scratch0, scratch1); + __ copy_u_b(dst, scratch0, 0); + break; + } case kMips64I16x8Splat: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ fill_h(i.OutputSimd128Register(), i.InputRegister(0)); @@ -2820,6 +2875,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( kSimd128RegZero); break; } + case kMips64I16x8BitMask: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Register dst = i.OutputRegister(); + Simd128Register src = i.InputSimd128Register(0); + Simd128Register scratch0 = kSimd128RegZero; + Simd128Register scratch1 = kSimd128ScratchReg; + __ srli_h(scratch0, src, 15); + __ srli_w(scratch1, scratch0, 15); + __ or_v(scratch0, scratch0, scratch1); + __ srli_d(scratch1, scratch0, 30); + __ or_v(scratch0, scratch0, scratch1); + __ shf_w(scratch1, scratch0, 0x0E); + __ slli_d(scratch1, scratch1, 4); + __ or_v(scratch0, scratch0, scratch1); + __ copy_u_b(dst, scratch0, 0); + break; + } case kMips64I8x16Splat: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ fill_b(i.OutputSimd128Register(), i.InputRegister(0)); @@ -3006,6 +3078,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( kSimd128RegZero); break; } + case kMips64I8x16BitMask: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + Register dst = i.OutputRegister(); + Simd128Register src = i.InputSimd128Register(0); + Simd128Register scratch0 = kSimd128RegZero; + Simd128Register scratch1 = kSimd128ScratchReg; + __ srli_b(scratch0, src, 7); + __ srli_h(scratch1, scratch0, 7); + __ or_v(scratch0, scratch0, scratch1); + __ srli_w(scratch1, scratch0, 14); + __ or_v(scratch0, scratch0, scratch1); + __ srli_d(scratch1, scratch0, 28); + __ or_v(scratch0, scratch0, scratch1); + __ shf_w(scratch1, scratch0, 0x0E); + __ ilvev_b(scratch0, scratch1, scratch0); + __ copy_u_h(dst, scratch0, 0); + break; + } case kMips64S128And: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0), @@ -3030,9 +3120,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0)); break; } - case kMips64S1x4AnyTrue: - case kMips64S1x8AnyTrue: - case kMips64S1x16AnyTrue: { + case kMips64V32x4AnyTrue: + case kMips64V16x8AnyTrue: + case kMips64V8x16AnyTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_false; @@ -3043,7 +3133,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bind(&all_false); break; } - case kMips64S1x4AllTrue: { + case kMips64V32x4AllTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_true; @@ -3054,7 +3144,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bind(&all_true); break; } - case kMips64S1x8AllTrue: { + case kMips64V16x8AllTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_true; @@ -3065,7 +3155,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bind(&all_true); break; } - case kMips64S1x16AllTrue: { + case kMips64V8x16AllTrue: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); Register dst = i.OutputRegister(); Label all_true; diff --git a/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h b/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h index 9303b4572f3..0c42c059ea5 100644 --- a/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h +++ b/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h @@ -203,6 +203,8 @@ namespace compiler { V(Mips64F64x2Splat) \ V(Mips64F64x2ExtractLane) \ V(Mips64F64x2ReplaceLane) \ + V(Mips64F64x2Pmin) \ + V(Mips64F64x2Pmax) \ V(Mips64I64x2Splat) \ V(Mips64I64x2ExtractLane) \ V(Mips64I64x2ReplaceLane) \ @@ -229,6 +231,8 @@ namespace compiler { V(Mips64F32x4Ne) \ V(Mips64F32x4Lt) \ V(Mips64F32x4Le) \ + V(Mips64F32x4Pmin) \ + V(Mips64F32x4Pmax) \ V(Mips64I32x4SConvertF32x4) \ V(Mips64I32x4UConvertF32x4) \ V(Mips64I32x4Neg) \ @@ -237,6 +241,7 @@ namespace compiler { V(Mips64I32x4GtU) \ V(Mips64I32x4GeU) \ V(Mips64I32x4Abs) \ + V(Mips64I32x4BitMask) \ V(Mips64I16x8Splat) \ V(Mips64I16x8ExtractLaneU) \ V(Mips64I16x8ExtractLaneS) \ @@ -265,6 +270,7 @@ namespace compiler { V(Mips64I16x8GeU) \ V(Mips64I16x8RoundingAverageU) \ V(Mips64I16x8Abs) \ + V(Mips64I16x8BitMask) \ V(Mips64I8x16Splat) \ V(Mips64I8x16ExtractLaneU) \ V(Mips64I8x16ExtractLaneS) \ @@ -292,18 +298,19 @@ namespace compiler { V(Mips64I8x16GeU) \ V(Mips64I8x16RoundingAverageU) \ V(Mips64I8x16Abs) \ + V(Mips64I8x16BitMask) \ V(Mips64S128And) \ V(Mips64S128Or) \ V(Mips64S128Xor) \ V(Mips64S128Not) \ V(Mips64S128Select) \ V(Mips64S128AndNot) \ - V(Mips64S1x4AnyTrue) \ - V(Mips64S1x4AllTrue) \ - V(Mips64S1x8AnyTrue) \ - V(Mips64S1x8AllTrue) \ - V(Mips64S1x16AnyTrue) \ - V(Mips64S1x16AllTrue) \ + V(Mips64V32x4AnyTrue) \ + V(Mips64V32x4AllTrue) \ + V(Mips64V16x8AnyTrue) \ + V(Mips64V16x8AllTrue) \ + V(Mips64V8x16AnyTrue) \ + V(Mips64V8x16AllTrue) \ V(Mips64S32x4InterleaveRight) \ V(Mips64S32x4InterleaveLeft) \ V(Mips64S32x4PackEven) \ diff --git a/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc index 81fc3b2ca9a..2f8a2722015 100644 --- a/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc +++ b/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc @@ -82,6 +82,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64F64x2Ne: case kMips64F64x2Lt: case kMips64F64x2Le: + case kMips64F64x2Pmin: + case kMips64F64x2Pmax: case kMips64I64x2Splat: case kMips64I64x2ExtractLane: case kMips64I64x2ReplaceLane: @@ -113,6 +115,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64F32x4Splat: case kMips64F32x4Sub: case kMips64F32x4UConvertI32x4: + case kMips64F32x4Pmin: + case kMips64F32x4Pmax: case kMips64F64x2Splat: case kMips64F64x2ExtractLane: case kMips64F64x2ReplaceLane: @@ -171,6 +175,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64I16x8UConvertI8x16Low: case kMips64I16x8RoundingAverageU: case kMips64I16x8Abs: + case kMips64I16x8BitMask: case kMips64I32x4Add: case kMips64I32x4AddHoriz: case kMips64I32x4Eq: @@ -199,6 +204,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64I32x4UConvertI16x8High: case kMips64I32x4UConvertI16x8Low: case kMips64I32x4Abs: + case kMips64I32x4BitMask: case kMips64I8x16Add: case kMips64I8x16AddSaturateS: case kMips64I8x16AddSaturateU: @@ -226,6 +232,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64I8x16SubSaturateU: case kMips64I8x16RoundingAverageU: case kMips64I8x16Abs: + case kMips64I8x16BitMask: case kMips64Ins: case kMips64Lsa: case kMips64MaxD: @@ -265,12 +272,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64S16x8PackOdd: case kMips64S16x2Reverse: case kMips64S16x4Reverse: - case kMips64S1x16AllTrue: - case kMips64S1x16AnyTrue: - case kMips64S1x4AllTrue: - case kMips64S1x4AnyTrue: - case kMips64S1x8AllTrue: - case kMips64S1x8AnyTrue: + case kMips64V8x16AllTrue: + case kMips64V8x16AnyTrue: + case kMips64V32x4AllTrue: + case kMips64V32x4AnyTrue: + case kMips64V16x8AllTrue: + case kMips64V16x8AnyTrue: case kMips64S32x4InterleaveEven: case kMips64S32x4InterleaveOdd: case kMips64S32x4InterleaveLeft: diff --git a/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc b/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc index 719a916b6a5..2c9c8d439b6 100644 --- a/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc +++ b/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc @@ -163,6 +163,14 @@ static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, g.UseRegister(node->InputAt(1))); } +static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + Mips64OperandGenerator g(selector); + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); +} + void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) { Mips64OperandGenerator g(selector); selector->Emit( @@ -2778,21 +2786,24 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I32x4UConvertI16x8Low, kMips64I32x4UConvertI16x8Low) \ V(I32x4UConvertI16x8High, kMips64I32x4UConvertI16x8High) \ V(I32x4Abs, kMips64I32x4Abs) \ + V(I32x4BitMask, kMips64I32x4BitMask) \ V(I16x8Neg, kMips64I16x8Neg) \ V(I16x8SConvertI8x16Low, kMips64I16x8SConvertI8x16Low) \ V(I16x8SConvertI8x16High, kMips64I16x8SConvertI8x16High) \ V(I16x8UConvertI8x16Low, kMips64I16x8UConvertI8x16Low) \ V(I16x8UConvertI8x16High, kMips64I16x8UConvertI8x16High) \ V(I16x8Abs, kMips64I16x8Abs) \ + V(I16x8BitMask, kMips64I16x8BitMask) \ V(I8x16Neg, kMips64I8x16Neg) \ V(I8x16Abs, kMips64I8x16Abs) \ + V(I8x16BitMask, kMips64I8x16BitMask) \ V(S128Not, kMips64S128Not) \ - V(S1x4AnyTrue, kMips64S1x4AnyTrue) \ - V(S1x4AllTrue, kMips64S1x4AllTrue) \ - V(S1x8AnyTrue, kMips64S1x8AnyTrue) \ - V(S1x8AllTrue, kMips64S1x8AllTrue) \ - V(S1x16AnyTrue, kMips64S1x16AnyTrue) \ - V(S1x16AllTrue, kMips64S1x16AllTrue) + V(V32x4AnyTrue, kMips64V32x4AnyTrue) \ + V(V32x4AllTrue, kMips64V32x4AllTrue) \ + V(V16x8AnyTrue, kMips64V16x8AnyTrue) \ + V(V16x8AllTrue, kMips64V16x8AllTrue) \ + V(V8x16AnyTrue, kMips64V8x16AnyTrue) \ + V(V8x16AllTrue, kMips64V8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ V(I64x2Shl) \ @@ -3099,6 +3110,22 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) { g.TempImmediate(0)); } +void InstructionSelector::VisitF32x4Pmin(Node* node) { + VisitUniqueRRR(this, kMips64F32x4Pmin, node); +} + +void InstructionSelector::VisitF32x4Pmax(Node* node) { + VisitUniqueRRR(this, kMips64F32x4Pmax, node); +} + +void InstructionSelector::VisitF64x2Pmin(Node* node) { + VisitUniqueRRR(this, kMips64F64x2Pmin, node); +} + +void InstructionSelector::VisitF64x2Pmax(Node* node) { + VisitUniqueRRR(this, kMips64F64x2Pmax, node); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc b/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc index b7fece3f72d..56c5003d2e8 100644 --- a/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc +++ b/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc @@ -1039,7 +1039,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( linkage()->GetIncomingDescriptor()->IsWasmCapiFunction(); int offset = (FLAG_enable_embedded_constant_pool ? 20 : 23) * kInstrSize; -#if defined(_AIX) +#if ABI_USES_FUNCTION_DESCRIPTORS // AIX/PPC64BE Linux uses a function descriptor int kNumParametersMask = kHasFunctionDescriptorBitMask - 1; num_parameters = kNumParametersMask & misc_field; @@ -2164,6 +2164,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); __ vsro(dst, dst, kScratchDoubleReg); // reload + __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ mtvsrd(kScratchDoubleReg, r0); __ vor(dst, dst, kScratchDoubleReg); break; @@ -2186,6 +2187,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); __ vsro(dst, dst, kScratchDoubleReg); // reload + __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ mtvsrd(kScratchDoubleReg, src); __ vor(dst, dst, kScratchDoubleReg); break; @@ -2208,46 +2210,709 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vspltb(dst, dst, Operand(7)); break; } +#define SHIFT_TO_CORRECT_LANE(starting_lane_nummber, lane_input, \ + lane_width_in_bytes, input_register) \ + int shift_bits = abs(lane_input - starting_lane_nummber) * \ + lane_width_in_bytes * kBitsPerByte; \ + if (shift_bits > 0) { \ + __ li(ip, Operand(shift_bits)); \ + __ mtvsrd(kScratchDoubleReg, ip); \ + __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \ + if (lane_input < starting_lane_nummber) { \ + __ vsro(kScratchDoubleReg, input_register, kScratchDoubleReg); \ + } else { \ + DCHECK(lane_input > starting_lane_nummber); \ + __ vslo(kScratchDoubleReg, input_register, kScratchDoubleReg); \ + } \ + input_register = kScratchDoubleReg; \ + } case kPPC_F64x2ExtractLane: { - __ mfvsrd(kScratchReg, i.InputSimd128Register(0)); + int32_t lane = 1 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(0, lane, 8, src); + __ mfvsrd(kScratchReg, src); __ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg); break; } case kPPC_F32x4ExtractLane: { - __ mfvsrwz(kScratchReg, i.InputSimd128Register(0)); + int32_t lane = 3 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(1, lane, 4, src) + __ mfvsrwz(kScratchReg, src); __ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg); break; } case kPPC_I64x2ExtractLane: { - __ mfvsrd(i.OutputRegister(), i.InputSimd128Register(0)); + int32_t lane = 1 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(0, lane, 8, src) + __ mfvsrd(i.OutputRegister(), src); break; } case kPPC_I32x4ExtractLane: { - __ mfvsrwz(i.OutputRegister(), i.InputSimd128Register(0)); + int32_t lane = 3 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(1, lane, 4, src) + __ mfvsrwz(i.OutputRegister(), src); break; } case kPPC_I16x8ExtractLaneU: { - __ mfvsrwz(r0, i.InputSimd128Register(0)); + int32_t lane = 7 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(2, lane, 2, src) + __ mfvsrwz(r0, src); __ li(ip, Operand(16)); __ srd(i.OutputRegister(), r0, ip); break; } case kPPC_I16x8ExtractLaneS: { - __ mfvsrwz(kScratchReg, i.InputSimd128Register(0)); + int32_t lane = 7 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(2, lane, 2, src) + __ mfvsrwz(kScratchReg, src); __ sradi(i.OutputRegister(), kScratchReg, 16); break; } case kPPC_I8x16ExtractLaneU: { - __ mfvsrwz(r0, i.InputSimd128Register(0)); + int32_t lane = 15 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(4, lane, 1, src) + __ mfvsrwz(r0, src); __ li(ip, Operand(24)); __ srd(i.OutputRegister(), r0, ip); break; } case kPPC_I8x16ExtractLaneS: { - __ mfvsrwz(kScratchReg, i.InputSimd128Register(0)); + int32_t lane = 15 - i.InputInt8(1); + Simd128Register src = i.InputSimd128Register(0); + SHIFT_TO_CORRECT_LANE(4, lane, 1, src) + __ mfvsrwz(kScratchReg, src); __ sradi(i.OutputRegister(), kScratchReg, 24); break; } +#undef SHIFT_TO_CORRECT_LANE +#define GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, \ + lane_width_in_bytes) \ + uint64_t mask = 0; \ + for (int i = 0, j = 0; i <= kSimd128Size - 1; i++) { \ + mask <<= kBitsPerByte; \ + if (i >= lane * lane_width_in_bytes && \ + i < lane * lane_width_in_bytes + lane_width_in_bytes) { \ + mask |= replacement_value_byte_lane + j; \ + j++; \ + } else { \ + mask |= i; \ + } \ + if (i == (kSimd128Size / 2) - 1) { \ + __ mov(r0, Operand(mask)); \ + mask = 0; \ + } else if (i >= kSimd128Size - 1) { \ + __ mov(ip, Operand(mask)); \ + } \ + } \ + /* Need to maintain 16 byte alignment for lvx */ \ + __ addi(sp, sp, Operand(-24)); \ + __ StoreP(ip, MemOperand(sp, 0)); \ + __ StoreP(r0, MemOperand(sp, 8)); \ + __ li(r0, Operand(0)); \ + __ lvx(kScratchDoubleReg, MemOperand(sp, r0)); \ + __ addi(sp, sp, Operand(24)); + case kPPC_F64x2ReplaceLane: { + Simd128Register src = i.InputSimd128Register(0); + Simd128Register dst = i.OutputSimd128Register(); + int32_t lane = 1 - i.InputInt8(1); + constexpr int replacement_value_byte_lane = 16; + constexpr int lane_width_in_bytes = 8; + GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, + lane_width_in_bytes) + __ MovDoubleToInt64(r0, i.InputDoubleRegister(2)); + __ mtvsrd(dst, r0); + __ vperm(dst, src, dst, kScratchDoubleReg); + break; + } + case kPPC_F32x4ReplaceLane: { + Simd128Register src = i.InputSimd128Register(0); + Simd128Register dst = i.OutputSimd128Register(); + int32_t lane = 3 - i.InputInt8(1); + constexpr int replacement_value_byte_lane = 20; + constexpr int lane_width_in_bytes = 4; + GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, + lane_width_in_bytes) + __ MovFloatToInt(kScratchReg, i.InputDoubleRegister(2)); + __ mtvsrd(dst, kScratchReg); + __ vperm(dst, src, dst, kScratchDoubleReg); + break; + } + case kPPC_I64x2ReplaceLane: { + Simd128Register src = i.InputSimd128Register(0); + Simd128Register dst = i.OutputSimd128Register(); + int32_t lane = 1 - i.InputInt8(1); + constexpr int replacement_value_byte_lane = 16; + constexpr int lane_width_in_bytes = 8; + GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, + lane_width_in_bytes) + __ mtvsrd(dst, i.InputRegister(2)); + __ vperm(dst, src, dst, kScratchDoubleReg); + break; + } + case kPPC_I32x4ReplaceLane: { + Simd128Register src = i.InputSimd128Register(0); + Simd128Register dst = i.OutputSimd128Register(); + int32_t lane = 3 - i.InputInt8(1); + constexpr int replacement_value_byte_lane = 20; + constexpr int lane_width_in_bytes = 4; + GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, + lane_width_in_bytes) + __ mtvsrd(dst, i.InputRegister(2)); + __ vperm(dst, src, dst, kScratchDoubleReg); + break; + } + case kPPC_I16x8ReplaceLane: { + Simd128Register src = i.InputSimd128Register(0); + Simd128Register dst = i.OutputSimd128Register(); + int32_t lane = 7 - i.InputInt8(1); + constexpr int replacement_value_byte_lane = 22; + constexpr int lane_width_in_bytes = 2; + GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, + lane_width_in_bytes) + __ mtvsrd(dst, i.InputRegister(2)); + __ vperm(dst, src, dst, kScratchDoubleReg); + break; + } + case kPPC_I8x16ReplaceLane: { + Simd128Register src = i.InputSimd128Register(0); + Simd128Register dst = i.OutputSimd128Register(); + int32_t lane = 15 - i.InputInt8(1); + constexpr int replacement_value_byte_lane = 23; + constexpr int lane_width_in_bytes = 1; + GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, + lane_width_in_bytes) + __ mtvsrd(dst, i.InputRegister(2)); + __ vperm(dst, src, dst, kScratchDoubleReg); + break; + } +#undef GENERATE_REPLACE_LANE_MASK + case kPPC_F64x2Add: { + __ xvadddp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F64x2Sub: { + __ xvsubdp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F64x2Mul: { + __ xvmuldp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F32x4Add: { + __ vaddfp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F32x4AddHoriz: { + Simd128Register src0 = i.InputSimd128Register(0); + Simd128Register src1 = i.InputSimd128Register(1); + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); + Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1)); + constexpr int shift_bits = 32; + // generate first operand + __ vpkudum(dst, src1, src0); + // generate second operand + __ li(ip, Operand(shift_bits)); + __ mtvsrd(tempFPReg2, ip); + __ vspltb(tempFPReg2, tempFPReg2, Operand(7)); + __ vsro(tempFPReg1, src0, tempFPReg2); + __ vsro(tempFPReg2, src1, tempFPReg2); + __ vpkudum(kScratchDoubleReg, tempFPReg2, tempFPReg1); + // add the operands + __ vaddfp(dst, kScratchDoubleReg, dst); + break; + } + case kPPC_F32x4Sub: { + __ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F32x4Mul: { + __ xvmulsp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2Add: { + __ vaddudm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2Sub: { + __ vsubudm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2Mul: { + // Need to maintain 16 byte alignment for stvx and lvx. + __ addi(sp, sp, Operand(-40)); + __ li(r0, Operand(0)); + __ stvx(i.InputSimd128Register(0), MemOperand(sp, r0)); + __ li(r0, Operand(16)); + __ stvx(i.InputSimd128Register(1), MemOperand(sp, r0)); + for (int i = 0; i < 2; i++) { + __ LoadP(r0, MemOperand(sp, kBitsPerByte * i)); + __ LoadP(ip, MemOperand(sp, (kBitsPerByte * i) + kSimd128Size)); + __ mulld(r0, r0, ip); + __ StoreP(r0, MemOperand(sp, i * kBitsPerByte)); + } + __ li(r0, Operand(0)); + __ lvx(i.OutputSimd128Register(), MemOperand(sp, r0)); + __ addi(sp, sp, Operand(40)); + break; + } + case kPPC_I32x4Add: { + __ vadduwm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4AddHoriz: { + Simd128Register src0 = i.InputSimd128Register(0); + Simd128Register src1 = i.InputSimd128Register(1); + Simd128Register dst = i.OutputSimd128Register(); + __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ vsum2sws(dst, src0, kScratchDoubleReg); + __ vsum2sws(kScratchDoubleReg, src1, kScratchDoubleReg); + __ vpkudum(dst, kScratchDoubleReg, dst); + break; + } + case kPPC_I32x4Sub: { + __ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4Mul: { + __ vmuluwm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8Add: { + __ vadduhm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8AddHoriz: { + Simd128Register src0 = i.InputSimd128Register(0); + Simd128Register src1 = i.InputSimd128Register(1); + Simd128Register dst = i.OutputSimd128Register(); + __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ vsum4shs(dst, src0, kScratchDoubleReg); + __ vsum4shs(kScratchDoubleReg, src1, kScratchDoubleReg); + __ vpkuwus(dst, kScratchDoubleReg, dst); + break; + } + case kPPC_I16x8Sub: { + __ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8Mul: { + __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); + __ vmladduhm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), kScratchDoubleReg); + break; + } + case kPPC_I8x16Add: { + __ vaddubm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16Sub: { + __ vsububm(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16Mul: { + __ vmuleub(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vmuloub(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vpkuhum(i.OutputSimd128Register(), kScratchDoubleReg, + i.OutputSimd128Register()); + break; + } + case kPPC_I64x2MinS: { + __ vminsd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4MinS: { + __ vminsw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2MinU: { + __ vminud(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4MinU: { + __ vminuw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8MinS: { + __ vminsh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8MinU: { + __ vminuh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16MinS: { + __ vminsb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16MinU: { + __ vminub(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2MaxS: { + __ vmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4MaxS: { + __ vmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2MaxU: { + __ vmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4MaxU: { + __ vmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8MaxS: { + __ vmaxsh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8MaxU: { + __ vmaxuh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16MaxS: { + __ vmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16MaxU: { + __ vmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F64x2Eq: { + __ xvcmpeqdp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F64x2Ne: { + __ xvcmpeqdp(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg); + break; + } + case kPPC_F64x2Le: { + __ xvcmpgedp(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(0)); + break; + } + case kPPC_F64x2Lt: { + __ xvcmpgtdp(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(0)); + break; + } + case kPPC_F32x4Eq: { + __ xvcmpeqsp(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2Eq: { + __ vcmpequd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4Eq: { + __ vcmpequw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8Eq: { + __ vcmpequh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16Eq: { + __ vcmpequb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_F32x4Ne: { + __ xvcmpeqsp(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg); + break; + } + case kPPC_I64x2Ne: { + __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg); + break; + } + case kPPC_I32x4Ne: { + __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg); + break; + } + case kPPC_I16x8Ne: { + __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg); + break; + } + case kPPC_I8x16Ne: { + __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg); + break; + } + case kPPC_F32x4Lt: { + __ xvcmpgtsp(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(0)); + break; + } + case kPPC_F32x4Le: { + __ xvcmpgesp(i.OutputSimd128Register(), i.InputSimd128Register(1), + i.InputSimd128Register(0)); + break; + } + case kPPC_I64x2GtS: { + __ vcmpgtsd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4GtS: { + __ vcmpgtsw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I64x2GeS: { + __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtsd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + break; + } + case kPPC_I32x4GeS: { + __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtsw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + break; + } + case kPPC_I64x2GtU: { + __ vcmpgtud(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I32x4GtU: { + __ vcmpgtuw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + + break; + } + case kPPC_I64x2GeU: { + __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtud(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + + break; + } + case kPPC_I32x4GeU: { + __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtuw(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + break; + } + case kPPC_I16x8GtS: { + __ vcmpgtsh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8GeS: { + __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtsh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + break; + } + case kPPC_I16x8GtU: { + __ vcmpgtuh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I16x8GeU: { + __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtuh(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + break; + } + case kPPC_I8x16GtS: { + __ vcmpgtsb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16GeS: { + __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtsb(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + break; + } + case kPPC_I8x16GtU: { + __ vcmpgtub(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } + case kPPC_I8x16GeU: { + __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vcmpgtub(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(), + kScratchDoubleReg); + break; + } +#define VECTOR_SHIFT(op) \ + { \ + __ mtvsrd(kScratchDoubleReg, i.InputRegister(1)); \ + __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \ + __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), \ + kScratchDoubleReg); \ + } + case kPPC_I64x2Shl: { + VECTOR_SHIFT(vsld) + break; + } + case kPPC_I64x2ShrS: { + VECTOR_SHIFT(vsrad) + break; + } + case kPPC_I64x2ShrU: { + VECTOR_SHIFT(vsrd) + break; + } + case kPPC_I32x4Shl: { + VECTOR_SHIFT(vslw) + break; + } + case kPPC_I32x4ShrS: { + VECTOR_SHIFT(vsraw) + break; + } + case kPPC_I32x4ShrU: { + VECTOR_SHIFT(vsrw) + break; + } + case kPPC_I16x8Shl: { + VECTOR_SHIFT(vslh) + break; + } + case kPPC_I16x8ShrS: { + VECTOR_SHIFT(vsrah) + break; + } + case kPPC_I16x8ShrU: { + VECTOR_SHIFT(vsrh) + break; + } + case kPPC_I8x16Shl: { + VECTOR_SHIFT(vslb) + break; + } + case kPPC_I8x16ShrS: { + VECTOR_SHIFT(vsrab) + break; + } + case kPPC_I8x16ShrU: { + VECTOR_SHIFT(vsrb) + break; + } +#undef VECTOR_SHIFT + case kPPC_S128And: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register src = i.InputSimd128Register(1); + __ vand(dst, i.InputSimd128Register(0), src); + break; + } + case kPPC_S128Or: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register src = i.InputSimd128Register(1); + __ vor(dst, i.InputSimd128Register(0), src); + break; + } + case kPPC_S128Xor: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register src = i.InputSimd128Register(1); + __ vxor(dst, i.InputSimd128Register(0), src); + break; + } + case kPPC_S128Zero: { + Simd128Register dst = i.OutputSimd128Register(); + __ vxor(dst, dst, dst); + break; + } + case kPPC_S128Not: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register src = i.InputSimd128Register(1); + __ vnor(dst, i.InputSimd128Register(0), src); + break; + } + case kPPC_S128Select: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register mask = i.InputSimd128Register(0); + Simd128Register src1 = i.InputSimd128Register(1); + Simd128Register src2 = i.InputSimd128Register(2); + __ vsel(dst, src2, src1, mask); + break; + } case kPPC_StoreCompressTagged: { ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX); break; diff --git a/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h b/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h index 4f6aeced6da..fdffc5f0963 100644 --- a/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h +++ b/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h @@ -192,18 +192,111 @@ namespace compiler { V(PPC_AtomicXorInt64) \ V(PPC_F64x2Splat) \ V(PPC_F64x2ExtractLane) \ + V(PPC_F64x2ReplaceLane) \ + V(PPC_F64x2Add) \ + V(PPC_F64x2Sub) \ + V(PPC_F64x2Mul) \ + V(PPC_F64x2Eq) \ + V(PPC_F64x2Ne) \ + V(PPC_F64x2Le) \ + V(PPC_F64x2Lt) \ V(PPC_F32x4Splat) \ V(PPC_F32x4ExtractLane) \ + V(PPC_F32x4ReplaceLane) \ + V(PPC_F32x4Add) \ + V(PPC_F32x4AddHoriz) \ + V(PPC_F32x4Sub) \ + V(PPC_F32x4Mul) \ + V(PPC_F32x4Eq) \ + V(PPC_F32x4Ne) \ + V(PPC_F32x4Lt) \ + V(PPC_F32x4Le) \ V(PPC_I64x2Splat) \ V(PPC_I64x2ExtractLane) \ + V(PPC_I64x2ReplaceLane) \ + V(PPC_I64x2Add) \ + V(PPC_I64x2Sub) \ + V(PPC_I64x2Mul) \ + V(PPC_I64x2MinS) \ + V(PPC_I64x2MinU) \ + V(PPC_I64x2MaxS) \ + V(PPC_I64x2MaxU) \ + V(PPC_I64x2Eq) \ + V(PPC_I64x2Ne) \ + V(PPC_I64x2GtS) \ + V(PPC_I64x2GtU) \ + V(PPC_I64x2GeU) \ + V(PPC_I64x2GeS) \ + V(PPC_I64x2Shl) \ + V(PPC_I64x2ShrS) \ + V(PPC_I64x2ShrU) \ V(PPC_I32x4Splat) \ V(PPC_I32x4ExtractLane) \ + V(PPC_I32x4ReplaceLane) \ + V(PPC_I32x4Add) \ + V(PPC_I32x4AddHoriz) \ + V(PPC_I32x4Sub) \ + V(PPC_I32x4Mul) \ + V(PPC_I32x4MinS) \ + V(PPC_I32x4MinU) \ + V(PPC_I32x4MaxS) \ + V(PPC_I32x4MaxU) \ + V(PPC_I32x4Eq) \ + V(PPC_I32x4Ne) \ + V(PPC_I32x4GtS) \ + V(PPC_I32x4GeS) \ + V(PPC_I32x4GtU) \ + V(PPC_I32x4GeU) \ + V(PPC_I32x4Shl) \ + V(PPC_I32x4ShrS) \ + V(PPC_I32x4ShrU) \ V(PPC_I16x8Splat) \ V(PPC_I16x8ExtractLaneU) \ V(PPC_I16x8ExtractLaneS) \ + V(PPC_I16x8ReplaceLane) \ + V(PPC_I16x8Add) \ + V(PPC_I16x8AddHoriz) \ + V(PPC_I16x8Sub) \ + V(PPC_I16x8Mul) \ + V(PPC_I16x8MinS) \ + V(PPC_I16x8MinU) \ + V(PPC_I16x8MaxS) \ + V(PPC_I16x8MaxU) \ + V(PPC_I16x8Eq) \ + V(PPC_I16x8Ne) \ + V(PPC_I16x8GtS) \ + V(PPC_I16x8GeS) \ + V(PPC_I16x8GtU) \ + V(PPC_I16x8GeU) \ + V(PPC_I16x8Shl) \ + V(PPC_I16x8ShrS) \ + V(PPC_I16x8ShrU) \ V(PPC_I8x16Splat) \ V(PPC_I8x16ExtractLaneU) \ V(PPC_I8x16ExtractLaneS) \ + V(PPC_I8x16ReplaceLane) \ + V(PPC_I8x16Add) \ + V(PPC_I8x16Sub) \ + V(PPC_I8x16Mul) \ + V(PPC_I8x16MinS) \ + V(PPC_I8x16MinU) \ + V(PPC_I8x16MaxS) \ + V(PPC_I8x16MaxU) \ + V(PPC_I8x16Eq) \ + V(PPC_I8x16Ne) \ + V(PPC_I8x16GtS) \ + V(PPC_I8x16GeS) \ + V(PPC_I8x16GtU) \ + V(PPC_I8x16GeU) \ + V(PPC_I8x16Shl) \ + V(PPC_I8x16ShrS) \ + V(PPC_I8x16ShrU) \ + V(PPC_S128And) \ + V(PPC_S128Or) \ + V(PPC_S128Xor) \ + V(PPC_S128Zero) \ + V(PPC_S128Not) \ + V(PPC_S128Select) \ V(PPC_StoreCompressTagged) \ V(PPC_LoadDecompressTaggedSigned) \ V(PPC_LoadDecompressTaggedPointer) \ diff --git a/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc b/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc index 68d0aaedc4b..b1d124432ef 100644 --- a/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc +++ b/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc @@ -115,18 +115,111 @@ int InstructionScheduler::GetTargetInstructionFlags( case kPPC_CompressAny: case kPPC_F64x2Splat: case kPPC_F64x2ExtractLane: + case kPPC_F64x2ReplaceLane: + case kPPC_F64x2Add: + case kPPC_F64x2Sub: + case kPPC_F64x2Mul: + case kPPC_F64x2Eq: + case kPPC_F64x2Ne: + case kPPC_F64x2Le: + case kPPC_F64x2Lt: case kPPC_F32x4Splat: case kPPC_F32x4ExtractLane: + case kPPC_F32x4ReplaceLane: + case kPPC_F32x4Add: + case kPPC_F32x4AddHoriz: + case kPPC_F32x4Sub: + case kPPC_F32x4Mul: + case kPPC_F32x4Eq: + case kPPC_F32x4Ne: + case kPPC_F32x4Lt: + case kPPC_F32x4Le: case kPPC_I64x2Splat: case kPPC_I64x2ExtractLane: + case kPPC_I64x2ReplaceLane: + case kPPC_I64x2Add: + case kPPC_I64x2Sub: + case kPPC_I64x2Mul: + case kPPC_I64x2MinS: + case kPPC_I64x2MinU: + case kPPC_I64x2MaxS: + case kPPC_I64x2MaxU: + case kPPC_I64x2Eq: + case kPPC_I64x2Ne: + case kPPC_I64x2GtS: + case kPPC_I64x2GtU: + case kPPC_I64x2GeU: + case kPPC_I64x2GeS: + case kPPC_I64x2Shl: + case kPPC_I64x2ShrS: + case kPPC_I64x2ShrU: case kPPC_I32x4Splat: case kPPC_I32x4ExtractLane: + case kPPC_I32x4ReplaceLane: + case kPPC_I32x4Add: + case kPPC_I32x4AddHoriz: + case kPPC_I32x4Sub: + case kPPC_I32x4Mul: + case kPPC_I32x4MinS: + case kPPC_I32x4MinU: + case kPPC_I32x4MaxS: + case kPPC_I32x4MaxU: + case kPPC_I32x4Eq: + case kPPC_I32x4Ne: + case kPPC_I32x4GtS: + case kPPC_I32x4GeS: + case kPPC_I32x4GtU: + case kPPC_I32x4GeU: + case kPPC_I32x4Shl: + case kPPC_I32x4ShrS: + case kPPC_I32x4ShrU: case kPPC_I16x8Splat: case kPPC_I16x8ExtractLaneU: case kPPC_I16x8ExtractLaneS: + case kPPC_I16x8ReplaceLane: + case kPPC_I16x8Add: + case kPPC_I16x8AddHoriz: + case kPPC_I16x8Sub: + case kPPC_I16x8Mul: + case kPPC_I16x8MinS: + case kPPC_I16x8MinU: + case kPPC_I16x8MaxS: + case kPPC_I16x8MaxU: + case kPPC_I16x8Eq: + case kPPC_I16x8Ne: + case kPPC_I16x8GtS: + case kPPC_I16x8GeS: + case kPPC_I16x8GtU: + case kPPC_I16x8GeU: + case kPPC_I16x8Shl: + case kPPC_I16x8ShrS: + case kPPC_I16x8ShrU: case kPPC_I8x16Splat: case kPPC_I8x16ExtractLaneU: case kPPC_I8x16ExtractLaneS: + case kPPC_I8x16ReplaceLane: + case kPPC_I8x16Add: + case kPPC_I8x16Sub: + case kPPC_I8x16Mul: + case kPPC_I8x16MinS: + case kPPC_I8x16MinU: + case kPPC_I8x16MaxS: + case kPPC_I8x16MaxU: + case kPPC_I8x16Eq: + case kPPC_I8x16Ne: + case kPPC_I8x16GtS: + case kPPC_I8x16GeS: + case kPPC_I8x16GtU: + case kPPC_I8x16GeU: + case kPPC_I8x16Shl: + case kPPC_I8x16ShrS: + case kPPC_I8x16ShrU: + case kPPC_S128And: + case kPPC_S128Or: + case kPPC_S128Xor: + case kPPC_S128Zero: + case kPPC_S128Not: + case kPPC_S128Select: return kNoOpcodeFlags; case kPPC_LoadWordS8: diff --git a/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc b/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc index 1598fbad041..d5ec475a808 100644 --- a/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc +++ b/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc @@ -2127,6 +2127,86 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(I16x8) \ V(I8x16) +#define SIMD_BINOP_LIST(V) \ + V(F64x2Add) \ + V(F64x2Sub) \ + V(F64x2Mul) \ + V(F64x2Eq) \ + V(F64x2Ne) \ + V(F64x2Le) \ + V(F64x2Lt) \ + V(F32x4Add) \ + V(F32x4AddHoriz) \ + V(F32x4Sub) \ + V(F32x4Mul) \ + V(F32x4Eq) \ + V(F32x4Ne) \ + V(F32x4Lt) \ + V(F32x4Le) \ + V(I64x2Add) \ + V(I64x2Sub) \ + V(I64x2Mul) \ + V(I32x4Add) \ + V(I32x4AddHoriz) \ + V(I32x4Sub) \ + V(I32x4Mul) \ + V(I32x4MinS) \ + V(I32x4MinU) \ + V(I32x4MaxS) \ + V(I32x4MaxU) \ + V(I32x4Eq) \ + V(I32x4Ne) \ + V(I32x4GtS) \ + V(I32x4GeS) \ + V(I32x4GtU) \ + V(I32x4GeU) \ + V(I16x8Add) \ + V(I16x8AddHoriz) \ + V(I16x8Sub) \ + V(I16x8Mul) \ + V(I16x8MinS) \ + V(I16x8MinU) \ + V(I16x8MaxS) \ + V(I16x8MaxU) \ + V(I16x8Eq) \ + V(I16x8Ne) \ + V(I16x8GtS) \ + V(I16x8GeS) \ + V(I16x8GtU) \ + V(I16x8GeU) \ + V(I8x16Add) \ + V(I8x16Sub) \ + V(I8x16Mul) \ + V(I8x16MinS) \ + V(I8x16MinU) \ + V(I8x16MaxS) \ + V(I8x16MaxU) \ + V(I8x16Eq) \ + V(I8x16Ne) \ + V(I8x16GtS) \ + V(I8x16GeS) \ + V(I8x16GtU) \ + V(I8x16GeU) \ + V(S128And) \ + V(S128Or) \ + V(S128Xor) + +#define SIMD_UNOP_LIST(V) V(S128Not) + +#define SIMD_SHIFT_LIST(V) \ + V(I64x2Shl) \ + V(I64x2ShrS) \ + V(I64x2ShrU) \ + V(I32x4Shl) \ + V(I32x4ShrS) \ + V(I32x4ShrU) \ + V(I16x8Shl) \ + V(I16x8ShrS) \ + V(I16x8ShrU) \ + V(I8x16Shl) \ + V(I8x16ShrS) \ + V(I8x16ShrU) + #define SIMD_VISIT_SPLAT(Type) \ void InstructionSelector::Visit##Type##Splat(Node* node) { \ PPCOperandGenerator g(this); \ @@ -2135,7 +2215,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { } SIMD_TYPES(SIMD_VISIT_SPLAT) #undef SIMD_VISIT_SPLAT -#undef SIMD_TYPES #define SIMD_VISIT_EXTRACT_LANE(Type, Sign) \ void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \ @@ -2153,72 +2232,74 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, U) SIMD_VISIT_EXTRACT_LANE(I8x16, S) #undef SIMD_VISIT_EXTRACT_LANE -void InstructionSelector::VisitI32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4Add(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4Sub(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4Shl(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4ShrS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4Mul(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4MaxS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4MinS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4Eq(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4Ne(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4MinU(Node* node) { UNIMPLEMENTED(); } +#define SIMD_VISIT_REPLACE_LANE(Type) \ + void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \ + PPCOperandGenerator g(this); \ + int32_t lane = OpParameter<int32_t>(node->op()); \ + Emit(kPPC_##Type##ReplaceLane, g.DefineAsRegister(node), \ + g.UseUniqueRegister(node->InputAt(0)), g.UseImmediate(lane), \ + g.UseUniqueRegister(node->InputAt(1))); \ + } +SIMD_TYPES(SIMD_VISIT_REPLACE_LANE) +#undef SIMD_VISIT_REPLACE_LANE + +#define SIMD_VISIT_BINOP(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + PPCOperandGenerator g(this); \ + InstructionOperand temps[] = {g.TempSimd128Register(), \ + g.TempSimd128Register()}; \ + Emit(kPPC_##Opcode, g.DefineAsRegister(node), \ + g.UseUniqueRegister(node->InputAt(0)), \ + g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ + } +SIMD_BINOP_LIST(SIMD_VISIT_BINOP) +#undef SIMD_VISIT_BINOP +#undef SIMD_BINOP_LIST + +#define SIMD_VISIT_UNOP(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + PPCOperandGenerator g(this); \ + Emit(kPPC_##Opcode, g.DefineAsRegister(node), \ + g.UseRegister(node->InputAt(0))); \ + } +SIMD_UNOP_LIST(SIMD_VISIT_UNOP) +#undef SIMD_VISIT_UNOP +#undef SIMD_UNOP_LIST + +#define SIMD_VISIT_SHIFT(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + PPCOperandGenerator g(this); \ + Emit(kPPC_##Opcode, g.DefineAsRegister(node), \ + g.UseUniqueRegister(node->InputAt(0)), \ + g.UseUniqueRegister(node->InputAt(1))); \ + } +SIMD_SHIFT_LIST(SIMD_VISIT_SHIFT) +#undef SIMD_VISIT_SHIFT +#undef SIMD_SHIFT_LIST +#undef SIMD_TYPES -void InstructionSelector::VisitI32x4MaxU(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitS128Zero(Node* node) { + PPCOperandGenerator g(this); + Emit(kPPC_S128Zero, g.DefineAsRegister(node)); +} -void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitS128Select(Node* node) { + PPCOperandGenerator g(this); + Emit(kPPC_S128Select, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(2))); +} void InstructionSelector::VisitI32x4Neg(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI32x4GtS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4GeS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4GtU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI32x4GeU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8ReplaceLane(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8Shl(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8ShrS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8ShrU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8Add(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI16x8AddSaturateS(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI16x8Sub(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI16x8SubSaturateS(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI16x8Mul(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8MinS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8MaxS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8Eq(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8Ne(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI16x8AddSaturateU(Node* node) { UNIMPLEMENTED(); } @@ -2227,20 +2308,8 @@ void InstructionSelector::VisitI16x8SubSaturateU(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI16x8MinU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8MaxU(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI16x8Neg(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI16x8GtS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8GeS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8GtU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI16x8GeU(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI16x8RoundingAverageU(Node* node) { UNIMPLEMENTED(); } @@ -2251,32 +2320,14 @@ void InstructionSelector::VisitI8x16RoundingAverageU(Node* node) { void InstructionSelector::VisitI8x16Neg(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI8x16ReplaceLane(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16Add(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI8x16AddSaturateS(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI8x16Sub(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI8x16SubSaturateS(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI8x16MinS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16MaxS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16Eq(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16Ne(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16GtS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16GeS(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI8x16AddSaturateU(Node* node) { UNIMPLEMENTED(); } @@ -2285,36 +2336,8 @@ void InstructionSelector::VisitI8x16SubSaturateU(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI8x16MinU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16MaxU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16GtU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16GeU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitS128And(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitS128Or(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitS128Xor(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitS128Not(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitS128AndNot(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS128Zero(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Eq(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Ne(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Lt(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Le(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::EmitPrepareResults( ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor, Node* node) { @@ -2338,12 +2361,6 @@ void InstructionSelector::EmitPrepareResults( } } -void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); } @@ -2352,8 +2369,6 @@ void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS128Select(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); } @@ -2364,10 +2379,6 @@ void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) { void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) { UNIMPLEMENTED(); } @@ -2431,68 +2442,32 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitS1x8AnyTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV32x4AnyTrue(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x8AllTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV32x4AllTrue(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x16AnyTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV16x8AnyTrue(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitS1x16AllTrue(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV16x8AllTrue(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV8x16AnyTrue(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); } +void InstructionSelector::VisitV8x16AllTrue(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); } -void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); } - -void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); } - void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); } diff --git a/chromium/v8/src/compiler/backend/register-allocator.cc b/chromium/v8/src/compiler/backend/register-allocator.cc index 8b74ef68b14..aab47722044 100644 --- a/chromium/v8/src/compiler/backend/register-allocator.cc +++ b/chromium/v8/src/compiler/backend/register-allocator.cc @@ -391,8 +391,8 @@ LiveRange::LiveRange(int relative_id, MachineRepresentation rep, next_(nullptr), current_interval_(nullptr), last_processed_use_(nullptr), - current_hint_position_(nullptr), - splitting_pointer_(nullptr) { + splitting_pointer_(nullptr), + current_hint_position_(nullptr) { DCHECK(AllocatedOperand::IsSupportedRepresentation(rep)); bits_ = AssignedRegisterField::encode(kUnassignedRegister) | RepresentationField::encode(rep) | @@ -473,11 +473,41 @@ RegisterKind LiveRange::kind() const { return IsFloatingPoint(representation()) ? FP_REGISTERS : GENERAL_REGISTERS; } -UsePosition* LiveRange::FirstHintPosition(int* register_index) const { - for (UsePosition* pos = first_pos_; pos != nullptr; pos = pos->next()) { - if (pos->HintRegister(register_index)) return pos; +UsePosition* LiveRange::FirstHintPosition(int* register_index) { + if (!first_pos_) return nullptr; + if (current_hint_position_) { + if (current_hint_position_->pos() < first_pos_->pos()) { + current_hint_position_ = first_pos_; + } + if (current_hint_position_->pos() > End()) { + current_hint_position_ = nullptr; + } } - return nullptr; + bool needs_revisit = false; + UsePosition* pos = current_hint_position_; + for (; pos != nullptr; pos = pos->next()) { + if (pos->HintRegister(register_index)) { + break; + } + // Phi and use position hints can be assigned during allocation which + // would invalidate the cached hint position. Make sure we revisit them. + needs_revisit = needs_revisit || + pos->hint_type() == UsePositionHintType::kPhi || + pos->hint_type() == UsePositionHintType::kUsePos; + } + if (!needs_revisit) { + current_hint_position_ = pos; + } +#ifdef DEBUG + UsePosition* pos_check = first_pos_; + for (; pos_check != nullptr; pos_check = pos_check->next()) { + if (pos_check->HasHint()) { + break; + } + } + CHECK_EQ(pos, pos_check); +#endif + return pos; } UsePosition* LiveRange::NextUsePosition(LifetimePosition start) const { @@ -684,6 +714,7 @@ UsePosition* LiveRange::DetachAt(LifetimePosition position, LiveRange* result, first_pos_ = nullptr; } result->first_pos_ = use_after; + result->current_hint_position_ = current_hint_position_; // Discard cached iteration state. It might be pointing // to the use that no longer belongs to this live range. @@ -693,6 +724,7 @@ UsePosition* LiveRange::DetachAt(LifetimePosition position, LiveRange* result, if (connect_hints == ConnectHints && use_before != nullptr && use_after != nullptr) { use_after->SetHint(use_before); + result->current_hint_position_ = use_after; } #ifdef DEBUG VerifyChildStructure(); @@ -2660,6 +2692,7 @@ void LiveRangeBuilder::BuildLiveRanges() { pos->set_type(new_type, true); } } + range->ResetCurrentHintPosition(); } for (auto preassigned : data()->preassigned_slot_ranges()) { TopLevelLiveRange* range = preassigned.first; @@ -3493,7 +3526,7 @@ void LinearScanAllocator::ComputeStateFromManyPredecessors( // Choose the live ranges from the majority. const size_t majority = (current_block->PredecessorCount() + 2 - deferred_blocks) / 2; - bool taken_registers[RegisterConfiguration::kMaxRegisters] = {0}; + bool taken_registers[RegisterConfiguration::kMaxRegisters] = {false}; auto assign_to_live = [this, counts, majority]( std::function<bool(TopLevelLiveRange*)> filter, RangeWithRegisterSet* to_be_live, diff --git a/chromium/v8/src/compiler/backend/register-allocator.h b/chromium/v8/src/compiler/backend/register-allocator.h index f890bd868b7..85a9cf12170 100644 --- a/chromium/v8/src/compiler/backend/register-allocator.h +++ b/chromium/v8/src/compiler/backend/register-allocator.h @@ -618,14 +618,14 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { LiveRange* SplitAt(LifetimePosition position, Zone* zone); // Returns nullptr when no register is hinted, otherwise sets register_index. - UsePosition* FirstHintPosition(int* register_index) const; - UsePosition* FirstHintPosition() const { + // Uses {current_hint_position_} as a cache, and tries to update it. + UsePosition* FirstHintPosition(int* register_index); + UsePosition* FirstHintPosition() { int register_index; return FirstHintPosition(®ister_index); } UsePosition* current_hint_position() const { - DCHECK(current_hint_position_ == FirstHintPosition()); return current_hint_position_; } @@ -656,6 +656,7 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { const InstructionOperand& spill_op); void SetUseHints(int register_index); void UnsetUseHints() { SetUseHints(kUnassignedRegister); } + void ResetCurrentHintPosition() { current_hint_position_ = first_pos_; } void Print(const RegisterConfiguration* config, bool with_children) const; void Print(bool with_children) const; @@ -701,10 +702,10 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) { mutable UseInterval* current_interval_; // This is used as a cache, it doesn't affect correctness. mutable UsePosition* last_processed_use_; - // This is used as a cache, it's invalid outside of BuildLiveRanges. - mutable UsePosition* current_hint_position_; // Cache the last position splintering stopped at. mutable UsePosition* splitting_pointer_; + // This is used as a cache in BuildLiveRanges and during register allocation. + UsePosition* current_hint_position_; LiveRangeBundle* bundle_ = nullptr; // Next interval start, relative to the current linear scan position. LifetimePosition next_start_; diff --git a/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc b/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc index cb79373b425..bef8e7c15aa 100644 --- a/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc +++ b/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc @@ -3853,10 +3853,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; } // vector boolean unops - case kS390_S1x2AnyTrue: - case kS390_S1x4AnyTrue: - case kS390_S1x8AnyTrue: - case kS390_S1x16AnyTrue: { + case kS390_V64x2AnyTrue: + case kS390_V32x4AnyTrue: + case kS390_V16x8AnyTrue: + case kS390_V8x16AnyTrue: { Simd128Register src = i.InputSimd128Register(0); Register dst = i.OutputRegister(); Register temp = i.TempRegister(0); @@ -3879,19 +3879,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vtm(kScratchDoubleReg, kScratchDoubleReg, Condition(0), Condition(0), \ Condition(0)); \ __ locgr(Condition(8), dst, temp); - case kS390_S1x2AllTrue: { + case kS390_V64x2AllTrue: { SIMD_ALL_TRUE(3) break; } - case kS390_S1x4AllTrue: { + case kS390_V32x4AllTrue: { SIMD_ALL_TRUE(2) break; } - case kS390_S1x8AllTrue: { + case kS390_V16x8AllTrue: { SIMD_ALL_TRUE(1) break; } - case kS390_S1x16AllTrue: { + case kS390_V8x16AllTrue: { SIMD_ALL_TRUE(0) break; } @@ -4154,10 +4154,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( for (int i = 0, j = 0; i < 2; i++, j = +2) { #ifdef V8_TARGET_BIG_ENDIAN __ lgfi(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1])); - __ aih(i < 1 ? ip : r0, Operand(k8x16_indices[j])); + __ iihf(i < 1 ? ip : r0, Operand(k8x16_indices[j])); #else __ lgfi(i < 1 ? ip : r0, Operand(k8x16_indices[j])); - __ aih(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1])); + __ iihf(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1])); #endif } __ vlvgp(kScratchDoubleReg, ip, r0); @@ -4185,6 +4185,119 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( #endif break; } + case kS390_I32x4BitMask: { +#ifdef V8_TARGET_BIG_ENDIAN + __ lgfi(kScratchReg, Operand(0x204060)); + __ iihf(kScratchReg, Operand(0x80808080)); // Zeroing the high bits. +#else + __ lgfi(kScratchReg, Operand(0x80808080)); + __ iihf(kScratchReg, Operand(0x60402000)); +#endif + __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3)); + __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg, + Condition(0), Condition(0), Condition(0)); + __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7), + Condition(0)); + break; + } + case kS390_I16x8BitMask: { +#ifdef V8_TARGET_BIG_ENDIAN + __ lgfi(kScratchReg, Operand(0x40506070)); + __ iihf(kScratchReg, Operand(0x102030)); +#else + __ lgfi(kScratchReg, Operand(0x30201000)); + __ iihf(kScratchReg, Operand(0x70605040)); +#endif + __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3)); + __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg, + Condition(0), Condition(0), Condition(0)); + __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7), + Condition(0)); + break; + } + case kS390_I8x16BitMask: { +#ifdef V8_TARGET_BIG_ENDIAN + __ lgfi(r0, Operand(0x60687078)); + __ iihf(r0, Operand(0x40485058)); + __ lgfi(ip, Operand(0x20283038)); + __ iihf(ip, Operand(0x81018)); +#else + __ lgfi(ip, Operand(0x58504840)); + __ iihf(ip, Operand(0x78706860)); + __ lgfi(r0, Operand(0x18100800)); + __ iihf(r0, Operand(0x38302820)); +#endif + __ vlvgp(kScratchDoubleReg, ip, r0); + __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg, + Condition(0), Condition(0), Condition(0)); + __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 3), + Condition(1)); + break; + } + case kS390_F32x4Pmin: { + __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), Condition(3), Condition(0), + Condition(2)); + break; + } + case kS390_F32x4Pmax: { + __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), Condition(3), Condition(0), + Condition(2)); + break; + } + case kS390_F64x2Pmin: { + __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), Condition(3), Condition(0), + Condition(3)); + break; + } + case kS390_F64x2Pmax: { + __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1), Condition(3), Condition(0), + Condition(3)); + break; + } + case kS390_F64x2Ceil: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(6), + Condition(0), Condition(3)); + break; + } + case kS390_F64x2Floor: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(7), + Condition(0), Condition(3)); + break; + } + case kS390_F64x2Trunc: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(5), + Condition(0), Condition(3)); + break; + } + case kS390_F64x2NearestInt: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(4), + Condition(0), Condition(3)); + break; + } + case kS390_F32x4Ceil: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(6), + Condition(0), Condition(2)); + break; + } + case kS390_F32x4Floor: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(7), + Condition(0), Condition(2)); + break; + } + case kS390_F32x4Trunc: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(5), + Condition(0), Condition(2)); + break; + } + case kS390_F32x4NearestInt: { + __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(4), + Condition(0), Condition(2)); + break; + } case kS390_StoreCompressTagged: { CHECK(!instr->HasOutput()); size_t index = 0; diff --git a/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h b/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h index 6101b22166c..f588e854265 100644 --- a/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h +++ b/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h @@ -215,6 +215,12 @@ namespace compiler { V(S390_F64x2ExtractLane) \ V(S390_F64x2Qfma) \ V(S390_F64x2Qfms) \ + V(S390_F64x2Pmin) \ + V(S390_F64x2Pmax) \ + V(S390_F64x2Ceil) \ + V(S390_F64x2Floor) \ + V(S390_F64x2Trunc) \ + V(S390_F64x2NearestInt) \ V(S390_F32x4Splat) \ V(S390_F32x4ExtractLane) \ V(S390_F32x4ReplaceLane) \ @@ -238,6 +244,12 @@ namespace compiler { V(S390_F32x4Max) \ V(S390_F32x4Qfma) \ V(S390_F32x4Qfms) \ + V(S390_F32x4Pmin) \ + V(S390_F32x4Pmax) \ + V(S390_F32x4Ceil) \ + V(S390_F32x4Floor) \ + V(S390_F32x4Trunc) \ + V(S390_F32x4NearestInt) \ V(S390_I64x2Neg) \ V(S390_I64x2Add) \ V(S390_I64x2Sub) \ @@ -286,6 +298,7 @@ namespace compiler { V(S390_I32x4UConvertI16x8Low) \ V(S390_I32x4UConvertI16x8High) \ V(S390_I32x4Abs) \ + V(S390_I32x4BitMask) \ V(S390_I16x8Splat) \ V(S390_I16x8ExtractLaneU) \ V(S390_I16x8ExtractLaneS) \ @@ -320,6 +333,7 @@ namespace compiler { V(S390_I16x8SubSaturateU) \ V(S390_I16x8RoundingAverageU) \ V(S390_I16x8Abs) \ + V(S390_I16x8BitMask) \ V(S390_I8x16Splat) \ V(S390_I8x16ExtractLaneU) \ V(S390_I8x16ExtractLaneS) \ @@ -349,16 +363,17 @@ namespace compiler { V(S390_I8x16SubSaturateU) \ V(S390_I8x16RoundingAverageU) \ V(S390_I8x16Abs) \ + V(S390_I8x16BitMask) \ V(S390_S8x16Shuffle) \ V(S390_S8x16Swizzle) \ - V(S390_S1x2AnyTrue) \ - V(S390_S1x4AnyTrue) \ - V(S390_S1x8AnyTrue) \ - V(S390_S1x16AnyTrue) \ - V(S390_S1x2AllTrue) \ - V(S390_S1x4AllTrue) \ - V(S390_S1x8AllTrue) \ - V(S390_S1x16AllTrue) \ + V(S390_V64x2AnyTrue) \ + V(S390_V32x4AnyTrue) \ + V(S390_V16x8AnyTrue) \ + V(S390_V8x16AnyTrue) \ + V(S390_V64x2AllTrue) \ + V(S390_V32x4AllTrue) \ + V(S390_V16x8AllTrue) \ + V(S390_V8x16AllTrue) \ V(S390_S128And) \ V(S390_S128Or) \ V(S390_S128Xor) \ diff --git a/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc b/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc index 502ce229f50..775590a863d 100644 --- a/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc +++ b/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc @@ -161,6 +161,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kS390_F64x2ExtractLane: case kS390_F64x2Qfma: case kS390_F64x2Qfms: + case kS390_F64x2Pmin: + case kS390_F64x2Pmax: + case kS390_F64x2Ceil: + case kS390_F64x2Floor: + case kS390_F64x2Trunc: + case kS390_F64x2NearestInt: case kS390_F32x4Splat: case kS390_F32x4ExtractLane: case kS390_F32x4ReplaceLane: @@ -184,6 +190,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kS390_F32x4Max: case kS390_F32x4Qfma: case kS390_F32x4Qfms: + case kS390_F32x4Pmin: + case kS390_F32x4Pmax: + case kS390_F32x4Ceil: + case kS390_F32x4Floor: + case kS390_F32x4Trunc: + case kS390_F32x4NearestInt: case kS390_I64x2Neg: case kS390_I64x2Add: case kS390_I64x2Sub: @@ -232,6 +244,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kS390_I32x4UConvertI16x8Low: case kS390_I32x4UConvertI16x8High: case kS390_I32x4Abs: + case kS390_I32x4BitMask: case kS390_I16x8Splat: case kS390_I16x8ExtractLaneU: case kS390_I16x8ExtractLaneS: @@ -266,6 +279,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kS390_I16x8SubSaturateU: case kS390_I16x8RoundingAverageU: case kS390_I16x8Abs: + case kS390_I16x8BitMask: case kS390_I8x16Splat: case kS390_I8x16ExtractLaneU: case kS390_I8x16ExtractLaneS: @@ -295,16 +309,17 @@ int InstructionScheduler::GetTargetInstructionFlags( case kS390_I8x16SubSaturateU: case kS390_I8x16RoundingAverageU: case kS390_I8x16Abs: + case kS390_I8x16BitMask: case kS390_S8x16Shuffle: case kS390_S8x16Swizzle: - case kS390_S1x2AnyTrue: - case kS390_S1x4AnyTrue: - case kS390_S1x8AnyTrue: - case kS390_S1x16AnyTrue: - case kS390_S1x2AllTrue: - case kS390_S1x4AllTrue: - case kS390_S1x8AllTrue: - case kS390_S1x16AllTrue: + case kS390_V64x2AnyTrue: + case kS390_V32x4AnyTrue: + case kS390_V16x8AnyTrue: + case kS390_V8x16AnyTrue: + case kS390_V64x2AllTrue: + case kS390_V32x4AllTrue: + case kS390_V16x8AllTrue: + case kS390_V8x16AllTrue: case kS390_S128And: case kS390_S128Or: case kS390_S128Xor: diff --git a/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc b/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc index 515e8dd127b..39089f346ed 100644 --- a/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc +++ b/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc @@ -2635,11 +2635,19 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) { V(F64x2Abs) \ V(F64x2Neg) \ V(F64x2Sqrt) \ + V(F64x2Ceil) \ + V(F64x2Floor) \ + V(F64x2Trunc) \ + V(F64x2NearestInt) \ V(F32x4Abs) \ V(F32x4Neg) \ V(F32x4RecipApprox) \ V(F32x4RecipSqrtApprox) \ V(F32x4Sqrt) \ + V(F32x4Ceil) \ + V(F32x4Floor) \ + V(F32x4Trunc) \ + V(F32x4NearestInt) \ V(I64x2Neg) \ V(I16x8Abs) \ V(I32x4Neg) \ @@ -2672,14 +2680,14 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) { V(I8x16ShrU) #define SIMD_BOOL_LIST(V) \ - V(S1x2AnyTrue) \ - V(S1x4AnyTrue) \ - V(S1x8AnyTrue) \ - V(S1x16AnyTrue) \ - V(S1x2AllTrue) \ - V(S1x4AllTrue) \ - V(S1x8AllTrue) \ - V(S1x16AllTrue) + V(V64x2AnyTrue) \ + V(V32x4AnyTrue) \ + V(V16x8AnyTrue) \ + V(V8x16AnyTrue) \ + V(V64x2AllTrue) \ + V(V32x4AllTrue) \ + V(V16x8AllTrue) \ + V(V8x16AllTrue) #define SIMD_CONVERSION_LIST(V) \ V(I32x4SConvertF32x4) \ @@ -2794,6 +2802,29 @@ SIMD_VISIT_QFMOP(F64x2Qfms) SIMD_VISIT_QFMOP(F32x4Qfma) SIMD_VISIT_QFMOP(F32x4Qfms) #undef SIMD_VISIT_QFMOP + +#define SIMD_VISIT_BITMASK(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + S390OperandGenerator g(this); \ + Emit(kS390_##Opcode, g.DefineAsRegister(node), \ + g.UseUniqueRegister(node->InputAt(0))); \ + } +SIMD_VISIT_BITMASK(I8x16BitMask) +SIMD_VISIT_BITMASK(I16x8BitMask) +SIMD_VISIT_BITMASK(I32x4BitMask) +#undef SIMD_VISIT_BITMASK + +#define SIMD_VISIT_PMIN_MAX(Type) \ + void InstructionSelector::Visit##Type(Node* node) { \ + S390OperandGenerator g(this); \ + Emit(kS390_##Type, g.DefineAsRegister(node), \ + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); \ + } +SIMD_VISIT_PMIN_MAX(F64x2Pmin) +SIMD_VISIT_PMIN_MAX(F32x4Pmin) +SIMD_VISIT_PMIN_MAX(F64x2Pmax) +SIMD_VISIT_PMIN_MAX(F32x4Pmax) +#undef SIMD_VISIT_PMIN_MAX #undef SIMD_TYPES void InstructionSelector::VisitS8x16Shuffle(Node* node) { diff --git a/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc b/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc index 4f99ad49ba8..110a478c543 100644 --- a/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc +++ b/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc @@ -194,6 +194,94 @@ class OutOfLineLoadFloat64NaN final : public OutOfLineCode { XMMRegister const result_; }; +class OutOfLineF32x4Min final : public OutOfLineCode { + public: + OutOfLineF32x4Min(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // propagate -0's and NaNs (possibly non-canonical) from the error. + __ Orps(error_, result_); + // Canonicalize NaNs by quieting and clearing the payload. + __ Cmpps(result_, error_, int8_t{3}); + __ Orps(error_, result_); + __ Psrld(result_, byte{10}); + __ Andnps(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + +class OutOfLineF64x2Min final : public OutOfLineCode { + public: + OutOfLineF64x2Min(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // propagate -0's and NaNs (possibly non-canonical) from the error. + __ Orpd(error_, result_); + // Canonicalize NaNs by quieting and clearing the payload. + __ Cmppd(result_, error_, int8_t{3}); + __ Orpd(error_, result_); + __ Psrlq(result_, 13); + __ Andnpd(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + +class OutOfLineF32x4Max final : public OutOfLineCode { + public: + OutOfLineF32x4Max(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // Propagate NaNs (possibly non-canonical). + __ Orps(result_, error_); + // Propagate sign errors and (subtle) quiet NaNs. + __ Subps(result_, error_); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ Cmpps(error_, result_, int8_t{3}); + __ Psrld(error_, byte{10}); + __ Andnps(error_, result_); + __ Movaps(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + +class OutOfLineF64x2Max final : public OutOfLineCode { + public: + OutOfLineF64x2Max(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // Propagate NaNs (possibly non-canonical). + __ Orpd(result_, error_); + // Propagate sign errors and (subtle) quiet NaNs. + __ Subpd(result_, error_); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ Cmppd(error_, result_, int8_t{3}); + __ Psrlq(error_, byte{13}); + __ Andnpd(error_, result_); + __ Movapd(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + class OutOfLineTruncateDoubleToI final : public OutOfLineCode { public: OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, @@ -2328,18 +2416,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - // The minpd instruction doesn't propagate NaNs and +0's in its first - // operand. Perform minpd in both orders, merge the resuls, and adjust. + // The minpd instruction doesn't propagate NaNs and -0's in its first + // operand. Perform minpd in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movapd(kScratchDoubleReg, src1); __ Minpd(kScratchDoubleReg, dst); __ Minpd(dst, src1); - // propagate -0's and NaNs, which may be non-canonical. - __ Orpd(kScratchDoubleReg, dst); - // Canonicalize NaNs by quieting and clearing the payload. - __ Cmppd(dst, kScratchDoubleReg, int8_t{3}); - __ Orpd(kScratchDoubleReg, dst); - __ Psrlq(dst, 13); - __ Andnpd(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorpd(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF64x2Min(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F64x2Max: { @@ -2347,20 +2435,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); // The maxpd instruction doesn't propagate NaNs and +0's in its first - // operand. Perform maxpd in both orders, merge the resuls, and adjust. + // operand. Perform maxpd in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movapd(kScratchDoubleReg, src1); __ Maxpd(kScratchDoubleReg, dst); __ Maxpd(dst, src1); - // Find discrepancies. - __ Xorpd(dst, kScratchDoubleReg); - // Propagate NaNs, which may be non-canonical. - __ Orpd(kScratchDoubleReg, dst); - // Propagate sign discrepancy and (subtle) quiet NaNs. - __ Subpd(kScratchDoubleReg, dst); - // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. - __ Cmppd(dst, kScratchDoubleReg, int8_t{3}); - __ Psrlq(dst, 13); - __ Andnpd(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorpd(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF64x2Max(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F64x2Eq: { @@ -2524,18 +2609,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - // The minps instruction doesn't propagate NaNs and +0's in its first - // operand. Perform minps in both orders, merge the resuls, and adjust. + // The minps instruction doesn't propagate NaNs and -0's in its first + // operand. Perform minps in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movaps(kScratchDoubleReg, src1); __ Minps(kScratchDoubleReg, dst); __ Minps(dst, src1); - // propagate -0's and NaNs, which may be non-canonical. - __ Orps(kScratchDoubleReg, dst); - // Canonicalize NaNs by quieting and clearing the payload. - __ Cmpps(dst, kScratchDoubleReg, int8_t{3}); - __ Orps(kScratchDoubleReg, dst); - __ Psrld(dst, byte{10}); - __ Andnps(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorps(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF32x4Min(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F32x4Max: { @@ -2543,20 +2628,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); // The maxps instruction doesn't propagate NaNs and +0's in its first - // operand. Perform maxps in both orders, merge the resuls, and adjust. + // operand. Perform maxps in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movaps(kScratchDoubleReg, src1); __ Maxps(kScratchDoubleReg, dst); __ Maxps(dst, src1); - // Find discrepancies. - __ Xorps(dst, kScratchDoubleReg); - // Propagate NaNs, which may be non-canonical. - __ Orps(kScratchDoubleReg, dst); - // Propagate sign discrepancy and (subtle) quiet NaNs. - __ Subps(kScratchDoubleReg, dst); - // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. - __ Cmpps(dst, kScratchDoubleReg, int8_t{3}); - __ Psrld(dst, byte{10}); - __ Andnps(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorps(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF32x4Max(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F32x4Eq: { @@ -2619,6 +2701,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Maxps(dst, i.InputSimd128Register(1)); break; } + case kX64F32x4Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode); + break; + } + case kX64F64x2Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode); + break; + } case kX64F64x2Pmin: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); @@ -3093,6 +3187,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0)); break; } + case kX64I32x4DotI16x8S: { + __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + break; + } case kX64S128Zero: { XMMRegister dst = i.OutputSimd128Register(); __ Xorps(dst, dst); @@ -3926,10 +4024,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Por(dst, kScratchDoubleReg); break; } - case kX64S1x2AnyTrue: - case kX64S1x4AnyTrue: - case kX64S1x8AnyTrue: - case kX64S1x16AnyTrue: { + case kX64V64x2AnyTrue: + case kX64V32x4AnyTrue: + case kX64V16x8AnyTrue: + case kX64V8x16AnyTrue: { Register dst = i.OutputRegister(); XMMRegister src = i.InputSimd128Register(0); @@ -3942,19 +4040,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1 // respectively. - case kX64S1x2AllTrue: { + case kX64V64x2AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq); break; } - case kX64S1x4AllTrue: { + case kX64V32x4AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd); break; } - case kX64S1x8AllTrue: { + case kX64V16x8AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw); break; } - case kX64S1x16AllTrue: { + case kX64V8x16AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb); break; } diff --git a/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h b/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h index 745f5c6cb25..ed7d2060f59 100644 --- a/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h +++ b/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h @@ -174,6 +174,7 @@ namespace compiler { V(X64F64x2Qfms) \ V(X64F64x2Pmin) \ V(X64F64x2Pmax) \ + V(X64F64x2Round) \ V(X64F32x4Splat) \ V(X64F32x4ExtractLane) \ V(X64F32x4ReplaceLane) \ @@ -199,6 +200,7 @@ namespace compiler { V(X64F32x4Qfms) \ V(X64F32x4Pmin) \ V(X64F32x4Pmax) \ + V(X64F32x4Round) \ V(X64I64x2Splat) \ V(X64I64x2ExtractLane) \ V(X64I64x2ReplaceLane) \ @@ -248,6 +250,7 @@ namespace compiler { V(X64I32x4GeU) \ V(X64I32x4Abs) \ V(X64I32x4BitMask) \ + V(X64I32x4DotI16x8S) \ V(X64I16x8Splat) \ V(X64I16x8ExtractLaneU) \ V(X64I16x8ExtractLaneS) \ @@ -357,14 +360,14 @@ namespace compiler { V(X64S8x8Reverse) \ V(X64S8x4Reverse) \ V(X64S8x2Reverse) \ - V(X64S1x2AnyTrue) \ - V(X64S1x2AllTrue) \ - V(X64S1x4AnyTrue) \ - V(X64S1x4AllTrue) \ - V(X64S1x8AnyTrue) \ - V(X64S1x8AllTrue) \ - V(X64S1x16AnyTrue) \ - V(X64S1x16AllTrue) \ + V(X64V64x2AnyTrue) \ + V(X64V64x2AllTrue) \ + V(X64V32x4AnyTrue) \ + V(X64V32x4AllTrue) \ + V(X64V16x8AnyTrue) \ + V(X64V16x8AllTrue) \ + V(X64V8x16AnyTrue) \ + V(X64V8x16AllTrue) \ V(X64Word64AtomicLoadUint8) \ V(X64Word64AtomicLoadUint16) \ V(X64Word64AtomicLoadUint32) \ diff --git a/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc index d2c1d14855c..395c4a4e9c7 100644 --- a/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -146,6 +146,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F64x2Qfms: case kX64F64x2Pmin: case kX64F64x2Pmax: + case kX64F64x2Round: case kX64F32x4Splat: case kX64F32x4ExtractLane: case kX64F32x4ReplaceLane: @@ -171,6 +172,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F32x4Qfms: case kX64F32x4Pmin: case kX64F32x4Pmax: + case kX64F32x4Round: case kX64I64x2Splat: case kX64I64x2ExtractLane: case kX64I64x2ReplaceLane: @@ -220,6 +222,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64I32x4GeU: case kX64I32x4Abs: case kX64I32x4BitMask: + case kX64I32x4DotI16x8S: case kX64I16x8Splat: case kX64I16x8ExtractLaneU: case kX64I16x8ExtractLaneS: @@ -292,12 +295,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64S128Select: case kX64S128Zero: case kX64S128AndNot: - case kX64S1x2AnyTrue: - case kX64S1x2AllTrue: - case kX64S1x4AnyTrue: - case kX64S1x4AllTrue: - case kX64S1x8AnyTrue: - case kX64S1x8AllTrue: + case kX64V64x2AnyTrue: + case kX64V64x2AllTrue: + case kX64V32x4AnyTrue: + case kX64V32x4AllTrue: + case kX64V16x8AnyTrue: + case kX64V16x8AllTrue: case kX64S8x16Swizzle: case kX64S8x16Shuffle: case kX64S32x4Swizzle: @@ -325,8 +328,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64S8x8Reverse: case kX64S8x4Reverse: case kX64S8x2Reverse: - case kX64S1x16AnyTrue: - case kX64S1x16AllTrue: + case kX64V8x16AnyTrue: + case kX64V8x16AllTrue: return (instr->addressing_mode() == kMode_None) ? kNoOpcodeFlags : kIsLoadOperation | kHasSideEffect; diff --git a/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc index dd3f556937d..ab669864954 100644 --- a/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc @@ -1461,7 +1461,16 @@ void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input, V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \ V(Float32RoundTiesEven, \ kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \ - V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest)) + V(Float64RoundTiesEven, \ + kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \ + V(F32x4Ceil, kX64F32x4Round | MiscField::encode(kRoundUp)) \ + V(F32x4Floor, kX64F32x4Round | MiscField::encode(kRoundDown)) \ + V(F32x4Trunc, kX64F32x4Round | MiscField::encode(kRoundToZero)) \ + V(F32x4NearestInt, kX64F32x4Round | MiscField::encode(kRoundToNearest)) \ + V(F64x2Ceil, kX64F64x2Round | MiscField::encode(kRoundUp)) \ + V(F64x2Floor, kX64F64x2Round | MiscField::encode(kRoundDown)) \ + V(F64x2Trunc, kX64F64x2Round | MiscField::encode(kRoundToZero)) \ + V(F64x2NearestInt, kX64F64x2Round | MiscField::encode(kRoundToNearest)) #define RO_VISITOR(Name, opcode) \ void InstructionSelector::Visit##Name(Node* node) { \ @@ -1898,16 +1907,33 @@ void VisitWord32EqualImpl(InstructionSelector* selector, Node* node, X64OperandGenerator g(selector); const RootsTable& roots_table = selector->isolate()->roots_table(); RootIndex root_index; - CompressedHeapObjectBinopMatcher m(node); - if (m.right().HasValue() && - roots_table.IsRootHandle(m.right().Value(), &root_index)) { + Node* left = nullptr; + Handle<HeapObject> right; + // HeapConstants and CompressedHeapConstants can be treated the same when + // using them as an input to a 32-bit comparison. Check whether either is + // present. + { + CompressedHeapObjectBinopMatcher m(node); + if (m.right().HasValue()) { + left = m.left().node(); + right = m.right().Value(); + } else { + HeapObjectBinopMatcher m2(node); + if (m2.right().HasValue()) { + left = m2.left().node(); + right = m2.right().Value(); + } + } + } + if (!right.is_null() && roots_table.IsRootHandle(right, &root_index)) { + DCHECK_NE(left, nullptr); InstructionCode opcode = kX64Cmp32 | AddressingModeField::encode(kMode_Root); return VisitCompare( selector, opcode, g.TempImmediate( TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)), - g.UseRegister(m.left().node()), cont); + g.UseRegister(left), cont); } } VisitWordCompare(selector, node, kX64Cmp32, cont); @@ -2674,6 +2700,7 @@ VISIT_ATOMIC_BINOP(Xor) V(I32x4MinU) \ V(I32x4MaxU) \ V(I32x4GeU) \ + V(I32x4DotI16x8S) \ V(I16x8SConvertI32x4) \ V(I16x8Add) \ V(I16x8AddSaturateS) \ @@ -2766,16 +2793,16 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16ShrU) #define SIMD_ANYTRUE_LIST(V) \ - V(S1x2AnyTrue) \ - V(S1x4AnyTrue) \ - V(S1x8AnyTrue) \ - V(S1x16AnyTrue) + V(V64x2AnyTrue) \ + V(V32x4AnyTrue) \ + V(V16x8AnyTrue) \ + V(V8x16AnyTrue) #define SIMD_ALLTRUE_LIST(V) \ - V(S1x2AllTrue) \ - V(S1x4AllTrue) \ - V(S1x8AllTrue) \ - V(S1x16AllTrue) + V(V64x2AllTrue) \ + V(V32x4AllTrue) \ + V(V16x8AllTrue) \ + V(V8x16AllTrue) void InstructionSelector::VisitS128Zero(Node* node) { X64OperandGenerator g(this); |