summaryrefslogtreecommitdiff
path: root/chromium/v8/src/compiler/backend/arm
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-12 14:27:29 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-13 09:35:20 +0000
commitc30a6232df03e1efbd9f3b226777b07e087a1122 (patch)
treee992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/v8/src/compiler/backend/arm
parent7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff)
downloadqtwebengine-chromium-85-based.tar.gz
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/v8/src/compiler/backend/arm')
-rw-r--r--chromium/v8/src/compiler/backend/arm/code-generator-arm.cc150
-rw-r--r--chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h16
-rw-r--r--chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc16
-rw-r--r--chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc53
4 files changed, 158 insertions, 77 deletions
diff --git a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc
index d453cf0188d..f50c0c858a7 100644
--- a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc
+++ b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc
@@ -1456,7 +1456,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kArmVrintmF32: {
CpuFeatureScope scope(tasm(), ARMv8);
- __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ if (instr->InputAt(0)->IsSimd128Register()) {
+ __ vrintm(NeonS32, i.OutputSimd128Register(),
+ i.InputSimd128Register(0));
+ } else {
+ __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ }
break;
}
case kArmVrintmF64: {
@@ -1466,7 +1471,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintpF32: {
CpuFeatureScope scope(tasm(), ARMv8);
- __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ if (instr->InputAt(0)->IsSimd128Register()) {
+ __ vrintp(NeonS32, i.OutputSimd128Register(),
+ i.InputSimd128Register(0));
+ } else {
+ __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ }
break;
}
case kArmVrintpF64: {
@@ -1476,7 +1486,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintzF32: {
CpuFeatureScope scope(tasm(), ARMv8);
- __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ if (instr->InputAt(0)->IsSimd128Register()) {
+ __ vrintz(NeonS32, i.OutputSimd128Register(),
+ i.InputSimd128Register(0));
+ } else {
+ __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ }
break;
}
case kArmVrintzF64: {
@@ -1960,43 +1975,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArmF64x2Lt: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
- __ mov(scratch, Operand(-1), LeaveCC, lt);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, cs);
+ __ mov(scratch, Operand(-1), LeaveCC, mi);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
- __ mov(scratch, Operand(-1), LeaveCC, lt);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, cs);
+ __ mov(scratch, Operand(-1), LeaveCC, mi);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmF64x2Le: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
- __ mov(scratch, Operand(-1), LeaveCC, le);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, hi);
+ __ mov(scratch, Operand(-1), LeaveCC, ls);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
- __ mov(scratch, Operand(-1), LeaveCC, le);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, hi);
+ __ mov(scratch, Operand(-1), LeaveCC, ls);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
+ case kArmF64x2Pmin: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_EQ(dst, lhs);
+
+ // Move rhs only when rhs is strictly greater (mi).
+ __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
+ __ vmov(dst.low(), rhs.low(), mi);
+ __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
+ __ vmov(dst.high(), rhs.high(), mi);
+ break;
+ }
+ case kArmF64x2Pmax: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_EQ(dst, lhs);
+
+ // Move rhs only when rhs is strictly greater (mi).
+ __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
+ __ vmov(dst.low(), rhs.low(), gt);
+ __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
+ __ vmov(dst.high(), rhs.high(), gt);
+ break;
+ }
case kArmI64x2SplatI32Pair: {
Simd128Register dst = i.OutputSimd128Register();
__ vdup(Neon32, dst, i.InputRegister(0));
@@ -2068,7 +2101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI64x2Neg: {
Simd128Register dst = i.OutputSimd128Register();
- __ vmov(dst, static_cast<uint64_t>(0));
+ __ vmov(dst, uint64_t{0});
__ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0));
break;
}
@@ -2220,6 +2253,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0));
break;
}
+ case kArmF32x4Pmin: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_NE(dst, lhs);
+ DCHECK_NE(dst, rhs);
+
+ // f32x4.pmin(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
+ // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
+ __ vcgt(dst, lhs, rhs);
+ __ vbsl(dst, rhs, lhs);
+ break;
+ }
+ case kArmF32x4Pmax: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_NE(dst, lhs);
+ DCHECK_NE(dst, rhs);
+
+ // f32x4.pmax(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
+ __ vcgt(dst, rhs, lhs);
+ __ vbsl(dst, rhs, lhs);
+ break;
+ }
case kArmI32x4Splat: {
__ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
break;
@@ -2361,8 +2421,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vshr(NeonS32, tmp2, src, 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
- __ vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001));
- __ vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004));
+ __ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001}));
+ __ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004}));
__ vand(tmp2, mask, tmp2);
__ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high());
__ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero);
@@ -2538,8 +2598,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vshr(NeonS16, tmp2, src, 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
- __ vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001));
- __ vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010));
+ __ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001}));
+ __ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010}));
__ vand(tmp2, mask, tmp2);
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
@@ -2692,8 +2752,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vshr(NeonS8, tmp2, src, 7);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
- __ vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201));
- __ vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201));
+ __ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201}));
+ __ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201}));
__ vand(tmp2, mask, tmp2);
__ vext(mask, tmp2, tmp2, 8);
__ vzip(Neon8, mask, tmp2);
@@ -3028,7 +3088,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
- case kArmS1x4AnyTrue: {
+ case kArmV32x4AnyTrue:
+ case kArmV16x8AnyTrue:
+ case kArmV8x16AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
@@ -3039,7 +3101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
- case kArmS1x4AllTrue: {
+ case kArmV32x4AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
@@ -3050,19 +3112,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
- case kArmS1x8AnyTrue: {
- const QwNeonRegister& src = i.InputSimd128Register(0);
- UseScratchRegisterScope temps(tasm());
- DwVfpRegister scratch = temps.AcquireD();
- __ vpmax(NeonU16, scratch, src.low(), src.high());
- __ vpmax(NeonU16, scratch, scratch, scratch);
- __ vpmax(NeonU16, scratch, scratch, scratch);
- __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
- __ cmp(i.OutputRegister(), Operand(0));
- __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
- break;
- }
- case kArmS1x8AllTrue: {
+ case kArmV16x8AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
@@ -3074,23 +3124,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
- case kArmS1x16AnyTrue: {
- const QwNeonRegister& src = i.InputSimd128Register(0);
- UseScratchRegisterScope temps(tasm());
- QwNeonRegister q_scratch = temps.AcquireQ();
- DwVfpRegister d_scratch = q_scratch.low();
- __ vpmax(NeonU8, d_scratch, src.low(), src.high());
- __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch);
- // vtst to detect any bits in the bottom 32 bits of d_scratch.
- // This saves an instruction vs. the naive sequence of vpmax.
- // kDoubleRegZero is not changed, since it is 0.
- __ vtst(Neon32, q_scratch, q_scratch, q_scratch);
- __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0);
- __ cmp(i.OutputRegister(), Operand(0));
- __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
- break;
- }
- case kArmS1x16AllTrue: {
+ case kArmV8x16AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
diff --git a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h
index c6365bf7a50..39ed658fc4b 100644
--- a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h
+++ b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h
@@ -144,6 +144,8 @@ namespace compiler {
V(ArmF64x2Ne) \
V(ArmF64x2Lt) \
V(ArmF64x2Le) \
+ V(ArmF64x2Pmin) \
+ V(ArmF64x2Pmax) \
V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \
@@ -165,6 +167,8 @@ namespace compiler {
V(ArmF32x4Ne) \
V(ArmF32x4Lt) \
V(ArmF32x4Le) \
+ V(ArmF32x4Pmin) \
+ V(ArmF32x4Pmax) \
V(ArmI64x2SplatI32Pair) \
V(ArmI64x2ReplaceLaneI32Pair) \
V(ArmI64x2Neg) \
@@ -304,12 +308,12 @@ namespace compiler {
V(ArmS8x8Reverse) \
V(ArmS8x4Reverse) \
V(ArmS8x2Reverse) \
- V(ArmS1x4AnyTrue) \
- V(ArmS1x4AllTrue) \
- V(ArmS1x8AnyTrue) \
- V(ArmS1x8AllTrue) \
- V(ArmS1x16AnyTrue) \
- V(ArmS1x16AllTrue) \
+ V(ArmV32x4AnyTrue) \
+ V(ArmV32x4AllTrue) \
+ V(ArmV16x8AnyTrue) \
+ V(ArmV16x8AllTrue) \
+ V(ArmV8x16AnyTrue) \
+ V(ArmV8x16AllTrue) \
V(ArmS8x16LoadSplat) \
V(ArmS16x8LoadSplat) \
V(ArmS32x4LoadSplat) \
diff --git a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
index 8c09acd6df8..196aa1ce6c0 100644
--- a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
+++ b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
@@ -124,6 +124,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF64x2Ne:
case kArmF64x2Lt:
case kArmF64x2Le:
+ case kArmF64x2Pmin:
+ case kArmF64x2Pmax:
case kArmF32x4Splat:
case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane:
@@ -145,6 +147,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4Ne:
case kArmF32x4Lt:
case kArmF32x4Le:
+ case kArmF32x4Pmin:
+ case kArmF32x4Pmax:
case kArmI64x2SplatI32Pair:
case kArmI64x2ReplaceLaneI32Pair:
case kArmI64x2Neg:
@@ -284,12 +288,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS8x8Reverse:
case kArmS8x4Reverse:
case kArmS8x2Reverse:
- case kArmS1x4AnyTrue:
- case kArmS1x4AllTrue:
- case kArmS1x8AnyTrue:
- case kArmS1x8AllTrue:
- case kArmS1x16AnyTrue:
- case kArmS1x16AllTrue:
+ case kArmV32x4AnyTrue:
+ case kArmV32x4AllTrue:
+ case kArmV16x8AnyTrue:
+ case kArmV16x8AllTrue:
+ case kArmV8x16AnyTrue:
+ case kArmV8x16AllTrue:
return kNoOpcodeFlags;
case kArmVldrF32:
diff --git a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc
index 74658697b50..de0e7c4162c 100644
--- a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc
+++ b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc
@@ -1495,7 +1495,10 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
V(Float64RoundTruncate, kArmVrintzF64) \
V(Float64RoundTiesAway, kArmVrintaF64) \
V(Float32RoundTiesEven, kArmVrintnF32) \
- V(Float64RoundTiesEven, kArmVrintnF64)
+ V(Float64RoundTiesEven, kArmVrintnF64) \
+ V(F32x4Ceil, kArmVrintpF32) \
+ V(F32x4Floor, kArmVrintmF32) \
+ V(F32x4Trunc, kArmVrintzF32)
#define RRR_OP_LIST(V) \
V(Int32MulHigh, kArmSmmul) \
@@ -2525,12 +2528,12 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I8x16Neg, kArmI8x16Neg) \
V(I8x16Abs, kArmI8x16Abs) \
V(S128Not, kArmS128Not) \
- V(S1x4AnyTrue, kArmS1x4AnyTrue) \
- V(S1x4AllTrue, kArmS1x4AllTrue) \
- V(S1x8AnyTrue, kArmS1x8AnyTrue) \
- V(S1x8AllTrue, kArmS1x8AllTrue) \
- V(S1x16AnyTrue, kArmS1x16AnyTrue) \
- V(S1x16AllTrue, kArmS1x16AllTrue)
+ V(V32x4AnyTrue, kArmV32x4AnyTrue) \
+ V(V32x4AllTrue, kArmV32x4AllTrue) \
+ V(V16x8AnyTrue, kArmV16x8AnyTrue) \
+ V(V16x8AllTrue, kArmV16x8AllTrue) \
+ V(V8x16AnyTrue, kArmV8x16AnyTrue) \
+ V(V8x16AllTrue, kArmV8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
V(I64x2Shl, 64) \
@@ -2941,6 +2944,42 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) {
VisitBitMask<kArmI32x4BitMask>(this, node);
}
+namespace {
+void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ ArmOperandGenerator g(selector);
+ // Need all unique registers because we first compare the two inputs, then we
+ // need the inputs to remain unchanged for the bitselect later.
+ selector->Emit(opcode, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)));
+}
+
+void VisitF64x2PminOrPMax(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ ArmOperandGenerator g(selector);
+ selector->Emit(opcode, g.DefineSameAsFirst(node),
+ g.UseRegister(node->InputAt(0)),
+ g.UseRegister(node->InputAt(1)));
+}
+} // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+ VisitF32x4PminOrPmax(this, kArmF32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+ VisitF32x4PminOrPmax(this, kArmF32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+ VisitF64x2PminOrPMax(this, kArmF64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+ VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {