diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/v8/src/compiler/backend/arm | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-85-based.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/v8/src/compiler/backend/arm')
4 files changed, 158 insertions, 77 deletions
diff --git a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc index d453cf0188d..f50c0c858a7 100644 --- a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc @@ -1456,7 +1456,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( break; case kArmVrintmF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintm(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintmF64: { @@ -1466,7 +1471,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintpF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintp(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintpF64: { @@ -1476,7 +1486,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmVrintzF32: { CpuFeatureScope scope(tasm(), ARMv8); - __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); + if (instr->InputAt(0)->IsSimd128Register()) { + __ vrintz(NeonS32, i.OutputSimd128Register(), + i.InputSimd128Register(0)); + } else { + __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0)); + } break; } case kArmVrintzF64: { @@ -1960,43 +1975,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( case kArmF64x2Lt: { UseScratchRegisterScope temps(tasm()); Register scratch = temps.Acquire(); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), i.InputSimd128Register(1).low()); - __ mov(scratch, Operand(-1), LeaveCC, lt); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, cs); + __ mov(scratch, Operand(-1), LeaveCC, mi); __ vmov(i.OutputSimd128Register().low(), scratch, scratch); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), i.InputSimd128Register(1).high()); - __ mov(scratch, Operand(-1), LeaveCC, lt); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, cs); + __ mov(scratch, Operand(-1), LeaveCC, mi); __ vmov(i.OutputSimd128Register().high(), scratch, scratch); break; } case kArmF64x2Le: { UseScratchRegisterScope temps(tasm()); Register scratch = temps.Acquire(); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(), i.InputSimd128Register(1).low()); - __ mov(scratch, Operand(-1), LeaveCC, le); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, hi); + __ mov(scratch, Operand(-1), LeaveCC, ls); __ vmov(i.OutputSimd128Register().low(), scratch, scratch); - __ mov(scratch, Operand(0)); __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(), i.InputSimd128Register(1).high()); - __ mov(scratch, Operand(-1), LeaveCC, le); - // Check for NaN. - __ mov(scratch, Operand(0), LeaveCC, vs); + __ mov(scratch, Operand(0), LeaveCC, hi); + __ mov(scratch, Operand(-1), LeaveCC, ls); __ vmov(i.OutputSimd128Register().high(), scratch, scratch); break; } + case kArmF64x2Pmin: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_EQ(dst, lhs); + + // Move rhs only when rhs is strictly greater (mi). + __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); + __ vmov(dst.low(), rhs.low(), mi); + __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); + __ vmov(dst.high(), rhs.high(), mi); + break; + } + case kArmF64x2Pmax: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_EQ(dst, lhs); + + // Move rhs only when rhs is strictly greater (mi). + __ VFPCompareAndSetFlags(rhs.low(), lhs.low()); + __ vmov(dst.low(), rhs.low(), gt); + __ VFPCompareAndSetFlags(rhs.high(), lhs.high()); + __ vmov(dst.high(), rhs.high(), gt); + break; + } case kArmI64x2SplatI32Pair: { Simd128Register dst = i.OutputSimd128Register(); __ vdup(Neon32, dst, i.InputRegister(0)); @@ -2068,7 +2101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( } case kArmI64x2Neg: { Simd128Register dst = i.OutputSimd128Register(); - __ vmov(dst, static_cast<uint64_t>(0)); + __ vmov(dst, uint64_t{0}); __ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0)); break; } @@ -2220,6 +2253,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(0)); break; } + case kArmF32x4Pmin: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_NE(dst, lhs); + DCHECK_NE(dst, rhs); + + // f32x4.pmin(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs)) + // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs)) + __ vcgt(dst, lhs, rhs); + __ vbsl(dst, rhs, lhs); + break; + } + case kArmF32x4Pmax: { + Simd128Register dst = i.OutputSimd128Register(); + Simd128Register lhs = i.InputSimd128Register(0); + Simd128Register rhs = i.InputSimd128Register(1); + DCHECK_NE(dst, lhs); + DCHECK_NE(dst, rhs); + + // f32x4.pmax(lhs, rhs) + // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs)) + __ vcgt(dst, rhs, lhs); + __ vbsl(dst, rhs, lhs); + break; + } case kArmI32x4Splat: { __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0)); break; @@ -2361,8 +2421,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS32, tmp2, src, 31); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001)); - __ vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004)); + __ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001})); + __ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004})); __ vand(tmp2, mask, tmp2); __ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero); @@ -2538,8 +2598,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS16, tmp2, src, 15); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001)); - __ vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010)); + __ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001})); + __ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010})); __ vand(tmp2, mask, tmp2); __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low()); @@ -2692,8 +2752,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vshr(NeonS8, tmp2, src, 7); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. - __ vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201)); - __ vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201)); + __ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201})); + __ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201})); __ vand(tmp2, mask, tmp2); __ vext(mask, tmp2, tmp2, 8); __ vzip(Neon8, mask, tmp2); @@ -3028,7 +3088,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } - case kArmS1x4AnyTrue: { + case kArmV32x4AnyTrue: + case kArmV16x8AnyTrue: + case kArmV8x16AnyTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3039,7 +3101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x4AllTrue: { + case kArmV32x4AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3050,19 +3112,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x8AnyTrue: { - const QwNeonRegister& src = i.InputSimd128Register(0); - UseScratchRegisterScope temps(tasm()); - DwVfpRegister scratch = temps.AcquireD(); - __ vpmax(NeonU16, scratch, src.low(), src.high()); - __ vpmax(NeonU16, scratch, scratch, scratch); - __ vpmax(NeonU16, scratch, scratch, scratch); - __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0); - __ cmp(i.OutputRegister(), Operand(0)); - __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); - break; - } - case kArmS1x8AllTrue: { + case kArmV16x8AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); @@ -3074,23 +3124,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); break; } - case kArmS1x16AnyTrue: { - const QwNeonRegister& src = i.InputSimd128Register(0); - UseScratchRegisterScope temps(tasm()); - QwNeonRegister q_scratch = temps.AcquireQ(); - DwVfpRegister d_scratch = q_scratch.low(); - __ vpmax(NeonU8, d_scratch, src.low(), src.high()); - __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch); - // vtst to detect any bits in the bottom 32 bits of d_scratch. - // This saves an instruction vs. the naive sequence of vpmax. - // kDoubleRegZero is not changed, since it is 0. - __ vtst(Neon32, q_scratch, q_scratch, q_scratch); - __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0); - __ cmp(i.OutputRegister(), Operand(0)); - __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); - break; - } - case kArmS1x16AllTrue: { + case kArmV8x16AllTrue: { const QwNeonRegister& src = i.InputSimd128Register(0); UseScratchRegisterScope temps(tasm()); DwVfpRegister scratch = temps.AcquireD(); diff --git a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h index c6365bf7a50..39ed658fc4b 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h +++ b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h @@ -144,6 +144,8 @@ namespace compiler { V(ArmF64x2Ne) \ V(ArmF64x2Lt) \ V(ArmF64x2Le) \ + V(ArmF64x2Pmin) \ + V(ArmF64x2Pmax) \ V(ArmF32x4Splat) \ V(ArmF32x4ExtractLane) \ V(ArmF32x4ReplaceLane) \ @@ -165,6 +167,8 @@ namespace compiler { V(ArmF32x4Ne) \ V(ArmF32x4Lt) \ V(ArmF32x4Le) \ + V(ArmF32x4Pmin) \ + V(ArmF32x4Pmax) \ V(ArmI64x2SplatI32Pair) \ V(ArmI64x2ReplaceLaneI32Pair) \ V(ArmI64x2Neg) \ @@ -304,12 +308,12 @@ namespace compiler { V(ArmS8x8Reverse) \ V(ArmS8x4Reverse) \ V(ArmS8x2Reverse) \ - V(ArmS1x4AnyTrue) \ - V(ArmS1x4AllTrue) \ - V(ArmS1x8AnyTrue) \ - V(ArmS1x8AllTrue) \ - V(ArmS1x16AnyTrue) \ - V(ArmS1x16AllTrue) \ + V(ArmV32x4AnyTrue) \ + V(ArmV32x4AllTrue) \ + V(ArmV16x8AnyTrue) \ + V(ArmV16x8AllTrue) \ + V(ArmV8x16AnyTrue) \ + V(ArmV8x16AllTrue) \ V(ArmS8x16LoadSplat) \ V(ArmS16x8LoadSplat) \ V(ArmS32x4LoadSplat) \ diff --git a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc index 8c09acd6df8..196aa1ce6c0 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc @@ -124,6 +124,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF64x2Ne: case kArmF64x2Lt: case kArmF64x2Le: + case kArmF64x2Pmin: + case kArmF64x2Pmax: case kArmF32x4Splat: case kArmF32x4ExtractLane: case kArmF32x4ReplaceLane: @@ -145,6 +147,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF32x4Ne: case kArmF32x4Lt: case kArmF32x4Le: + case kArmF32x4Pmin: + case kArmF32x4Pmax: case kArmI64x2SplatI32Pair: case kArmI64x2ReplaceLaneI32Pair: case kArmI64x2Neg: @@ -284,12 +288,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmS8x8Reverse: case kArmS8x4Reverse: case kArmS8x2Reverse: - case kArmS1x4AnyTrue: - case kArmS1x4AllTrue: - case kArmS1x8AnyTrue: - case kArmS1x8AllTrue: - case kArmS1x16AnyTrue: - case kArmS1x16AllTrue: + case kArmV32x4AnyTrue: + case kArmV32x4AllTrue: + case kArmV16x8AnyTrue: + case kArmV16x8AllTrue: + case kArmV8x16AnyTrue: + case kArmV8x16AllTrue: return kNoOpcodeFlags; case kArmVldrF32: diff --git a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc index 74658697b50..de0e7c4162c 100644 --- a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc +++ b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc @@ -1495,7 +1495,10 @@ void InstructionSelector::VisitUint32Mod(Node* node) { V(Float64RoundTruncate, kArmVrintzF64) \ V(Float64RoundTiesAway, kArmVrintaF64) \ V(Float32RoundTiesEven, kArmVrintnF32) \ - V(Float64RoundTiesEven, kArmVrintnF64) + V(Float64RoundTiesEven, kArmVrintnF64) \ + V(F32x4Ceil, kArmVrintpF32) \ + V(F32x4Floor, kArmVrintmF32) \ + V(F32x4Trunc, kArmVrintzF32) #define RRR_OP_LIST(V) \ V(Int32MulHigh, kArmSmmul) \ @@ -2525,12 +2528,12 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(I8x16Neg, kArmI8x16Neg) \ V(I8x16Abs, kArmI8x16Abs) \ V(S128Not, kArmS128Not) \ - V(S1x4AnyTrue, kArmS1x4AnyTrue) \ - V(S1x4AllTrue, kArmS1x4AllTrue) \ - V(S1x8AnyTrue, kArmS1x8AnyTrue) \ - V(S1x8AllTrue, kArmS1x8AllTrue) \ - V(S1x16AnyTrue, kArmS1x16AnyTrue) \ - V(S1x16AllTrue, kArmS1x16AllTrue) + V(V32x4AnyTrue, kArmV32x4AnyTrue) \ + V(V32x4AllTrue, kArmV32x4AllTrue) \ + V(V16x8AnyTrue, kArmV16x8AnyTrue) \ + V(V16x8AllTrue, kArmV16x8AllTrue) \ + V(V8x16AnyTrue, kArmV8x16AnyTrue) \ + V(V8x16AllTrue, kArmV8x16AllTrue) #define SIMD_SHIFT_OP_LIST(V) \ V(I64x2Shl, 64) \ @@ -2941,6 +2944,42 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { VisitBitMask<kArmI32x4BitMask>(this, node); } +namespace { +void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + ArmOperandGenerator g(selector); + // Need all unique registers because we first compare the two inputs, then we + // need the inputs to remain unchanged for the bitselect later. + selector->Emit(opcode, g.DefineAsRegister(node), + g.UseUniqueRegister(node->InputAt(0)), + g.UseUniqueRegister(node->InputAt(1))); +} + +void VisitF64x2PminOrPMax(InstructionSelector* selector, ArchOpcode opcode, + Node* node) { + ArmOperandGenerator g(selector); + selector->Emit(opcode, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1))); +} +} // namespace + +void InstructionSelector::VisitF32x4Pmin(Node* node) { + VisitF32x4PminOrPmax(this, kArmF32x4Pmin, node); +} + +void InstructionSelector::VisitF32x4Pmax(Node* node) { + VisitF32x4PminOrPmax(this, kArmF32x4Pmax, node); +} + +void InstructionSelector::VisitF64x2Pmin(Node* node) { + VisitF64x2PminOrPMax(this, kArmF64x2Pmin, node); +} + +void InstructionSelector::VisitF64x2Pmax(Node* node) { + VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node); +} + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { |