summaryrefslogtreecommitdiff
path: root/chromium/v8/src/compiler/backend/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/v8/src/compiler/backend/arm64')
-rw-r--r--chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc123
-rw-r--r--chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h29
-rw-r--r--chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc29
-rw-r--r--chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc86
4 files changed, 211 insertions, 56 deletions
diff --git a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc
index 4cf19a5d802..d21440c35b3 100644
--- a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc
+++ b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc
@@ -502,8 +502,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
__ asm_imm(i.OutputSimd128Register().format(), \
i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
} else { \
- VRegister tmp = i.TempSimd128Register(0); \
- Register shift = i.TempRegister(1).gp(); \
+ UseScratchRegisterScope temps(tasm()); \
+ VRegister tmp = temps.AcquireQ(); \
+ Register shift = temps.Acquire##gp(); \
constexpr int mask = (1 << width) - 1; \
__ And(shift, i.InputRegister32(1), mask); \
__ Dup(tmp.format(), shift); \
@@ -521,8 +522,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
__ asm_imm(i.OutputSimd128Register().format(), \
i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
} else { \
- VRegister tmp = i.TempSimd128Register(0); \
- Register shift = i.TempRegister(1).gp(); \
+ UseScratchRegisterScope temps(tasm()); \
+ VRegister tmp = temps.AcquireQ(); \
+ Register shift = temps.Acquire##gp(); \
constexpr int mask = (1 << width) - 1; \
__ And(shift, i.InputRegister32(1), mask); \
__ Dup(tmp.format(), shift); \
@@ -1901,6 +1903,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D);
+ case kArm64F64x2Pmin: {
+ VRegister dst = i.OutputSimd128Register().V2D();
+ VRegister lhs = i.InputSimd128Register(0).V2D();
+ VRegister rhs = i.InputSimd128Register(1).V2D();
+ // f64x2.pmin(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
+ // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
+ __ Fcmgt(dst, lhs, rhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F64x2Pmax: {
+ VRegister dst = i.OutputSimd128Register().V2D();
+ VRegister lhs = i.InputSimd128Register(0).V2D();
+ VRegister rhs = i.InputSimd128Register(1).V2D();
+ // f64x2.pmax(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
+ __ Fcmgt(dst, rhs, lhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F64x2RoundUp:
+ __ Frintp(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
+ case kArm64F64x2RoundDown:
+ __ Frintm(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
+ case kArm64F64x2RoundTruncate:
+ __ Frintz(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
+ case kArm64F64x2RoundTiesEven:
+ __ Frintn(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
case kArm64F32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
break;
@@ -1953,6 +1992,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S);
+ case kArm64F32x4Pmin: {
+ VRegister dst = i.OutputSimd128Register().V4S();
+ VRegister lhs = i.InputSimd128Register(0).V4S();
+ VRegister rhs = i.InputSimd128Register(1).V4S();
+ // f32x4.pmin(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
+ // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
+ __ Fcmgt(dst, lhs, rhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F32x4Pmax: {
+ VRegister dst = i.OutputSimd128Register().V4S();
+ VRegister lhs = i.InputSimd128Register(0).V4S();
+ VRegister rhs = i.InputSimd128Register(1).V4S();
+ // f32x4.pmax(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
+ __ Fcmgt(dst, rhs, lhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F32x4RoundUp:
+ __ Frintp(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
+ case kArm64F32x4RoundDown:
+ __ Frintm(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
+ case kArm64F32x4RoundTruncate:
+ __ Frintz(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
+ case kArm64F32x4RoundTiesEven:
+ __ Frintn(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
case kArm64I64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
break;
@@ -2132,6 +2208,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mov(dst.W(), tmp.V4S(), 0);
break;
}
+ case kArm64I32x4DotI16x8S: {
+ UseScratchRegisterScope scope(tasm());
+ VRegister lhs = i.InputSimd128Register(0);
+ VRegister rhs = i.InputSimd128Register(1);
+ VRegister tmp1 = scope.AcquireV(kFormat4S);
+ VRegister tmp2 = scope.AcquireV(kFormat4S);
+ __ Smull(tmp1, lhs.V4H(), rhs.V4H());
+ __ Smull2(tmp2, lhs.V8H(), rhs.V8H());
+ __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
+ break;
+ }
case kArm64I16x8Splat: {
__ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0));
break;
@@ -2480,7 +2567,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B);
SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B);
SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B);
- case kArm64S1x2AllTrue: {
+ case kArm64V64x2AllTrue: {
UseScratchRegisterScope scope(tasm());
VRegister temp1 = scope.AcquireV(kFormat2D);
VRegister temp2 = scope.AcquireV(kFormatS);
@@ -2508,32 +2595,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64I16x8Load8x8S: {
- __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
break;
}
case kArm64I16x8Load8x8U: {
- __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
break;
}
case kArm64I32x4Load16x4S: {
- __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
break;
}
case kArm64I32x4Load16x4U: {
- __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
break;
}
case kArm64I64x2Load32x2S: {
- __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
break;
}
case kArm64I64x2Load32x2U: {
- __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
break;
}
@@ -2548,13 +2635,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; \
}
// for AnyTrue, the format does not matter, umaxv does not support 2D
- SIMD_REDUCE_OP_CASE(kArm64S1x2AnyTrue, Umaxv, kFormatS, 4S);
- SIMD_REDUCE_OP_CASE(kArm64S1x4AnyTrue, Umaxv, kFormatS, 4S);
- SIMD_REDUCE_OP_CASE(kArm64S1x4AllTrue, Uminv, kFormatS, 4S);
- SIMD_REDUCE_OP_CASE(kArm64S1x8AnyTrue, Umaxv, kFormatH, 8H);
- SIMD_REDUCE_OP_CASE(kArm64S1x8AllTrue, Uminv, kFormatH, 8H);
- SIMD_REDUCE_OP_CASE(kArm64S1x16AnyTrue, Umaxv, kFormatB, 16B);
- SIMD_REDUCE_OP_CASE(kArm64S1x16AllTrue, Uminv, kFormatB, 16B);
+ SIMD_REDUCE_OP_CASE(kArm64V64x2AnyTrue, Umaxv, kFormatS, 4S);
+ SIMD_REDUCE_OP_CASE(kArm64V32x4AnyTrue, Umaxv, kFormatS, 4S);
+ SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S);
+ SIMD_REDUCE_OP_CASE(kArm64V16x8AnyTrue, Umaxv, kFormatH, 8H);
+ SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H);
+ SIMD_REDUCE_OP_CASE(kArm64V8x16AnyTrue, Umaxv, kFormatB, 16B);
+ SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B);
}
return kSuccess;
} // NOLINT(readability/fn_size)
diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
index a8e2b52c028..41f9d78550e 100644
--- a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
+++ b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
@@ -186,6 +186,12 @@ namespace compiler {
V(Arm64F64x2Le) \
V(Arm64F64x2Qfma) \
V(Arm64F64x2Qfms) \
+ V(Arm64F64x2Pmin) \
+ V(Arm64F64x2Pmax) \
+ V(Arm64F64x2RoundUp) \
+ V(Arm64F64x2RoundDown) \
+ V(Arm64F64x2RoundTruncate) \
+ V(Arm64F64x2RoundTiesEven) \
V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \
@@ -209,6 +215,12 @@ namespace compiler {
V(Arm64F32x4Le) \
V(Arm64F32x4Qfma) \
V(Arm64F32x4Qfms) \
+ V(Arm64F32x4Pmin) \
+ V(Arm64F32x4Pmax) \
+ V(Arm64F32x4RoundUp) \
+ V(Arm64F32x4RoundDown) \
+ V(Arm64F32x4RoundTruncate) \
+ V(Arm64F32x4RoundTiesEven) \
V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \
@@ -256,6 +268,7 @@ namespace compiler {
V(Arm64I32x4GeU) \
V(Arm64I32x4Abs) \
V(Arm64I32x4BitMask) \
+ V(Arm64I32x4DotI16x8S) \
V(Arm64I16x8Splat) \
V(Arm64I16x8ExtractLaneU) \
V(Arm64I16x8ExtractLaneS) \
@@ -361,14 +374,14 @@ namespace compiler {
V(Arm64S8x8Reverse) \
V(Arm64S8x4Reverse) \
V(Arm64S8x2Reverse) \
- V(Arm64S1x2AnyTrue) \
- V(Arm64S1x2AllTrue) \
- V(Arm64S1x4AnyTrue) \
- V(Arm64S1x4AllTrue) \
- V(Arm64S1x8AnyTrue) \
- V(Arm64S1x8AllTrue) \
- V(Arm64S1x16AnyTrue) \
- V(Arm64S1x16AllTrue) \
+ V(Arm64V64x2AnyTrue) \
+ V(Arm64V64x2AllTrue) \
+ V(Arm64V32x4AnyTrue) \
+ V(Arm64V32x4AllTrue) \
+ V(Arm64V16x8AnyTrue) \
+ V(Arm64V16x8AllTrue) \
+ V(Arm64V8x16AnyTrue) \
+ V(Arm64V8x16AllTrue) \
V(Arm64S8x16LoadSplat) \
V(Arm64S16x8LoadSplat) \
V(Arm64S32x4LoadSplat) \
diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
index 128ebdac957..3ea84730801 100644
--- a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
+++ b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
@@ -156,6 +156,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Le:
case kArm64F64x2Qfma:
case kArm64F64x2Qfms:
+ case kArm64F64x2Pmin:
+ case kArm64F64x2Pmax:
+ case kArm64F64x2RoundUp:
+ case kArm64F64x2RoundDown:
+ case kArm64F64x2RoundTruncate:
+ case kArm64F64x2RoundTiesEven:
case kArm64F32x4Splat:
case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane:
@@ -179,6 +185,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Le:
case kArm64F32x4Qfma:
case kArm64F32x4Qfms:
+ case kArm64F32x4Pmin:
+ case kArm64F32x4Pmax:
+ case kArm64F32x4RoundUp:
+ case kArm64F32x4RoundDown:
+ case kArm64F32x4RoundTruncate:
+ case kArm64F32x4RoundTiesEven:
case kArm64I64x2Splat:
case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane:
@@ -226,6 +238,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4GeU:
case kArm64I32x4Abs:
case kArm64I32x4BitMask:
+ case kArm64I32x4DotI16x8S:
case kArm64I16x8Splat:
case kArm64I16x8ExtractLaneU:
case kArm64I16x8ExtractLaneS:
@@ -331,14 +344,14 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64S8x8Reverse:
case kArm64S8x4Reverse:
case kArm64S8x2Reverse:
- case kArm64S1x2AnyTrue:
- case kArm64S1x2AllTrue:
- case kArm64S1x4AnyTrue:
- case kArm64S1x4AllTrue:
- case kArm64S1x8AnyTrue:
- case kArm64S1x8AllTrue:
- case kArm64S1x16AnyTrue:
- case kArm64S1x16AllTrue:
+ case kArm64V64x2AnyTrue:
+ case kArm64V64x2AllTrue:
+ case kArm64V32x4AnyTrue:
+ case kArm64V32x4AllTrue:
+ case kArm64V16x8AnyTrue:
+ case kArm64V16x8AllTrue:
+ case kArm64V8x16AnyTrue:
+ case kArm64V8x16AllTrue:
case kArm64TestAndBranch32:
case kArm64TestAndBranch:
case kArm64CompareAndBranch32:
diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
index 06a87a8aab7..2e0d977c3c7 100644
--- a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
+++ b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
@@ -163,13 +163,9 @@ void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
g.UseImmediate(node->InputAt(1)));
}
} else {
- InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
- // We only need a unique register for the first input (src), since in
- // the codegen we use tmp to store the shifts, and then later use it with
- // src. The second input can be the same as the second temp (shift).
selector->Emit(opcode, g.DefineAsRegister(node),
- g.UseUniqueRegister(node->InputAt(0)),
- g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
+ g.UseRegister(node->InputAt(0)),
+ g.UseRegister(node->InputAt(1)));
}
}
@@ -608,18 +604,23 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode = kArchNop;
+ bool require_add = false;
switch (params.transformation) {
case LoadTransformation::kS8x16LoadSplat:
opcode = kArm64S8x16LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kS16x8LoadSplat:
opcode = kArm64S16x8LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kS32x4LoadSplat:
opcode = kArm64S32x4LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kS64x2LoadSplat:
opcode = kArm64S64x2LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kI16x8Load8x8S:
opcode = kArm64I16x8Load8x8S;
@@ -655,13 +656,17 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
inputs[1] = g.UseRegister(index);
outputs[0] = g.DefineAsRegister(node);
- // ld1r uses post-index, so construct address first.
- // TODO(v8:9886) If index can be immediate, use vldr without this add.
- InstructionOperand addr = g.TempRegister();
- Emit(kArm64Add, 1, &addr, 2, inputs);
- inputs[0] = addr;
- inputs[1] = g.TempImmediate(0);
- opcode |= AddressingModeField::encode(kMode_MRI);
+ if (require_add) {
+ // ld1r uses post-index, so construct address first.
+ // TODO(v8:9886) If index can be immediate, use vldr without this add.
+ InstructionOperand addr = g.TempRegister();
+ Emit(kArm64Add, 1, &addr, 2, inputs);
+ inputs[0] = addr;
+ inputs[1] = g.TempImmediate(0);
+ opcode |= AddressingModeField::encode(kMode_MRI);
+ } else {
+ opcode |= AddressingModeField::encode(kMode_MRR);
+ }
Emit(opcode, 1, outputs, 2, inputs);
}
@@ -1360,7 +1365,15 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \
V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \
V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \
- V(Float64SilenceNaN, kArm64Float64SilenceNaN)
+ V(Float64SilenceNaN, kArm64Float64SilenceNaN) \
+ V(F32x4Ceil, kArm64F32x4RoundUp) \
+ V(F32x4Floor, kArm64F32x4RoundDown) \
+ V(F32x4Trunc, kArm64F32x4RoundTruncate) \
+ V(F32x4NearestInt, kArm64F32x4RoundTiesEven) \
+ V(F64x2Ceil, kArm64F64x2RoundUp) \
+ V(F64x2Floor, kArm64F64x2RoundDown) \
+ V(F64x2Trunc, kArm64F64x2RoundTruncate) \
+ V(F64x2NearestInt, kArm64F64x2RoundTiesEven)
#define RRR_OP_LIST(V) \
V(Int32Div, kArm64Idiv32) \
@@ -3184,14 +3197,14 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \
V(S128Not, kArm64S128Not) \
- V(S1x2AnyTrue, kArm64S1x2AnyTrue) \
- V(S1x2AllTrue, kArm64S1x2AllTrue) \
- V(S1x4AnyTrue, kArm64S1x4AnyTrue) \
- V(S1x4AllTrue, kArm64S1x4AllTrue) \
- V(S1x8AnyTrue, kArm64S1x8AnyTrue) \
- V(S1x8AllTrue, kArm64S1x8AllTrue) \
- V(S1x16AnyTrue, kArm64S1x16AnyTrue) \
- V(S1x16AllTrue, kArm64S1x16AllTrue)
+ V(V64x2AnyTrue, kArm64V64x2AnyTrue) \
+ V(V64x2AllTrue, kArm64V64x2AllTrue) \
+ V(V32x4AnyTrue, kArm64V32x4AnyTrue) \
+ V(V32x4AllTrue, kArm64V32x4AllTrue) \
+ V(V16x8AnyTrue, kArm64V16x8AnyTrue) \
+ V(V16x8AllTrue, kArm64V16x8AllTrue) \
+ V(V8x16AnyTrue, kArm64V8x16AnyTrue) \
+ V(V8x16AllTrue, kArm64V8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
V(I64x2Shl, 64) \
@@ -3249,6 +3262,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4MaxU, kArm64I32x4MaxU) \
V(I32x4GtU, kArm64I32x4GtU) \
V(I32x4GeU, kArm64I32x4GeU) \
+ V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \
V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \
V(I16x8AddSaturateS, kArm64I16x8AddSaturateS) \
V(I16x8AddHoriz, kArm64I16x8AddHoriz) \
@@ -3613,6 +3627,34 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
VisitRR(this, kArm64Sxtw, node);
}
+namespace {
+void VisitPminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ Arm64OperandGenerator g(selector);
+ // Need all unique registers because we first compare the two inputs, then we
+ // need the inputs to remain unchanged for the bitselect later.
+ selector->Emit(opcode, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)));
+}
+} // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+ VisitPminOrPmax(this, kArm64F32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+ VisitPminOrPmax(this, kArm64F32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+ VisitPminOrPmax(this, kArm64F64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+ VisitPminOrPmax(this, kArm64F64x2Pmax, node);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {