summaryrefslogtreecommitdiff
path: root/chromium/v8/src/compiler/backend
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/v8/src/compiler/backend')
-rw-r--r--chromium/v8/src/compiler/backend/arm/code-generator-arm.cc150
-rw-r--r--chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h16
-rw-r--r--chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc16
-rw-r--r--chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc53
-rw-r--r--chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc123
-rw-r--r--chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h29
-rw-r--r--chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc29
-rw-r--r--chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc86
-rw-r--r--chromium/v8/src/compiler/backend/code-generator.cc47
-rw-r--r--chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc31
-rw-r--r--chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h15
-rw-r--r--chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc15
-rw-r--r--chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc68
-rw-r--r--chromium/v8/src/compiler/backend/instruction-selector-impl.h2
-rw-r--r--chromium/v8/src/compiler/backend/instruction-selector.cc97
-rw-r--r--chromium/v8/src/compiler/backend/instruction.h2
-rw-r--r--chromium/v8/src/compiler/backend/mips/code-generator-mips.cc102
-rw-r--r--chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h19
-rw-r--r--chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc19
-rw-r--r--chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc39
-rw-r--r--chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc102
-rw-r--r--chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h19
-rw-r--r--chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc19
-rw-r--r--chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc39
-rw-r--r--chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc683
-rw-r--r--chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h93
-rw-r--r--chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc93
-rw-r--r--chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc307
-rw-r--r--chromium/v8/src/compiler/backend/register-allocator.cc47
-rw-r--r--chromium/v8/src/compiler/backend/register-allocator.h11
-rw-r--r--chromium/v8/src/compiler/backend/s390/code-generator-s390.cc133
-rw-r--r--chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h31
-rw-r--r--chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc31
-rw-r--r--chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc47
-rw-r--r--chromium/v8/src/compiler/backend/x64/code-generator-x64.cc194
-rw-r--r--chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h19
-rw-r--r--chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc19
-rw-r--r--chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc53
38 files changed, 2331 insertions, 567 deletions
diff --git a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc
index d453cf0188d..f50c0c858a7 100644
--- a/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc
+++ b/chromium/v8/src/compiler/backend/arm/code-generator-arm.cc
@@ -1456,7 +1456,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
case kArmVrintmF32: {
CpuFeatureScope scope(tasm(), ARMv8);
- __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ if (instr->InputAt(0)->IsSimd128Register()) {
+ __ vrintm(NeonS32, i.OutputSimd128Register(),
+ i.InputSimd128Register(0));
+ } else {
+ __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ }
break;
}
case kArmVrintmF64: {
@@ -1466,7 +1471,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintpF32: {
CpuFeatureScope scope(tasm(), ARMv8);
- __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ if (instr->InputAt(0)->IsSimd128Register()) {
+ __ vrintp(NeonS32, i.OutputSimd128Register(),
+ i.InputSimd128Register(0));
+ } else {
+ __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ }
break;
}
case kArmVrintpF64: {
@@ -1476,7 +1486,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintzF32: {
CpuFeatureScope scope(tasm(), ARMv8);
- __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ if (instr->InputAt(0)->IsSimd128Register()) {
+ __ vrintz(NeonS32, i.OutputSimd128Register(),
+ i.InputSimd128Register(0));
+ } else {
+ __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
+ }
break;
}
case kArmVrintzF64: {
@@ -1960,43 +1975,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArmF64x2Lt: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
- __ mov(scratch, Operand(-1), LeaveCC, lt);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, cs);
+ __ mov(scratch, Operand(-1), LeaveCC, mi);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
- __ mov(scratch, Operand(-1), LeaveCC, lt);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, cs);
+ __ mov(scratch, Operand(-1), LeaveCC, mi);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
case kArmF64x2Le: {
UseScratchRegisterScope temps(tasm());
Register scratch = temps.Acquire();
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
- __ mov(scratch, Operand(-1), LeaveCC, le);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, hi);
+ __ mov(scratch, Operand(-1), LeaveCC, ls);
__ vmov(i.OutputSimd128Register().low(), scratch, scratch);
- __ mov(scratch, Operand(0));
__ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
- __ mov(scratch, Operand(-1), LeaveCC, le);
- // Check for NaN.
- __ mov(scratch, Operand(0), LeaveCC, vs);
+ __ mov(scratch, Operand(0), LeaveCC, hi);
+ __ mov(scratch, Operand(-1), LeaveCC, ls);
__ vmov(i.OutputSimd128Register().high(), scratch, scratch);
break;
}
+ case kArmF64x2Pmin: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_EQ(dst, lhs);
+
+ // Move rhs only when rhs is strictly greater (mi).
+ __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
+ __ vmov(dst.low(), rhs.low(), mi);
+ __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
+ __ vmov(dst.high(), rhs.high(), mi);
+ break;
+ }
+ case kArmF64x2Pmax: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_EQ(dst, lhs);
+
+ // Move rhs only when rhs is strictly greater (mi).
+ __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
+ __ vmov(dst.low(), rhs.low(), gt);
+ __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
+ __ vmov(dst.high(), rhs.high(), gt);
+ break;
+ }
case kArmI64x2SplatI32Pair: {
Simd128Register dst = i.OutputSimd128Register();
__ vdup(Neon32, dst, i.InputRegister(0));
@@ -2068,7 +2101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmI64x2Neg: {
Simd128Register dst = i.OutputSimd128Register();
- __ vmov(dst, static_cast<uint64_t>(0));
+ __ vmov(dst, uint64_t{0});
__ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0));
break;
}
@@ -2220,6 +2253,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0));
break;
}
+ case kArmF32x4Pmin: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_NE(dst, lhs);
+ DCHECK_NE(dst, rhs);
+
+ // f32x4.pmin(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
+ // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
+ __ vcgt(dst, lhs, rhs);
+ __ vbsl(dst, rhs, lhs);
+ break;
+ }
+ case kArmF32x4Pmax: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ DCHECK_NE(dst, lhs);
+ DCHECK_NE(dst, rhs);
+
+ // f32x4.pmax(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
+ __ vcgt(dst, rhs, lhs);
+ __ vbsl(dst, rhs, lhs);
+ break;
+ }
case kArmI32x4Splat: {
__ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
break;
@@ -2361,8 +2421,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vshr(NeonS32, tmp2, src, 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
- __ vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001));
- __ vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004));
+ __ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001}));
+ __ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004}));
__ vand(tmp2, mask, tmp2);
__ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high());
__ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero);
@@ -2538,8 +2598,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vshr(NeonS16, tmp2, src, 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
- __ vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001));
- __ vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010));
+ __ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001}));
+ __ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010}));
__ vand(tmp2, mask, tmp2);
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
@@ -2692,8 +2752,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vshr(NeonS8, tmp2, src, 7);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
- __ vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201));
- __ vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201));
+ __ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201}));
+ __ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201}));
__ vand(tmp2, mask, tmp2);
__ vext(mask, tmp2, tmp2, 8);
__ vzip(Neon8, mask, tmp2);
@@ -3028,7 +3088,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
- case kArmS1x4AnyTrue: {
+ case kArmV32x4AnyTrue:
+ case kArmV16x8AnyTrue:
+ case kArmV8x16AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
@@ -3039,7 +3101,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
- case kArmS1x4AllTrue: {
+ case kArmV32x4AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
@@ -3050,19 +3112,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
- case kArmS1x8AnyTrue: {
- const QwNeonRegister& src = i.InputSimd128Register(0);
- UseScratchRegisterScope temps(tasm());
- DwVfpRegister scratch = temps.AcquireD();
- __ vpmax(NeonU16, scratch, src.low(), src.high());
- __ vpmax(NeonU16, scratch, scratch, scratch);
- __ vpmax(NeonU16, scratch, scratch, scratch);
- __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
- __ cmp(i.OutputRegister(), Operand(0));
- __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
- break;
- }
- case kArmS1x8AllTrue: {
+ case kArmV16x8AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
@@ -3074,23 +3124,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
- case kArmS1x16AnyTrue: {
- const QwNeonRegister& src = i.InputSimd128Register(0);
- UseScratchRegisterScope temps(tasm());
- QwNeonRegister q_scratch = temps.AcquireQ();
- DwVfpRegister d_scratch = q_scratch.low();
- __ vpmax(NeonU8, d_scratch, src.low(), src.high());
- __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch);
- // vtst to detect any bits in the bottom 32 bits of d_scratch.
- // This saves an instruction vs. the naive sequence of vpmax.
- // kDoubleRegZero is not changed, since it is 0.
- __ vtst(Neon32, q_scratch, q_scratch, q_scratch);
- __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0);
- __ cmp(i.OutputRegister(), Operand(0));
- __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
- break;
- }
- case kArmS1x16AllTrue: {
+ case kArmV8x16AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
DwVfpRegister scratch = temps.AcquireD();
diff --git a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h
index c6365bf7a50..39ed658fc4b 100644
--- a/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h
+++ b/chromium/v8/src/compiler/backend/arm/instruction-codes-arm.h
@@ -144,6 +144,8 @@ namespace compiler {
V(ArmF64x2Ne) \
V(ArmF64x2Lt) \
V(ArmF64x2Le) \
+ V(ArmF64x2Pmin) \
+ V(ArmF64x2Pmax) \
V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \
@@ -165,6 +167,8 @@ namespace compiler {
V(ArmF32x4Ne) \
V(ArmF32x4Lt) \
V(ArmF32x4Le) \
+ V(ArmF32x4Pmin) \
+ V(ArmF32x4Pmax) \
V(ArmI64x2SplatI32Pair) \
V(ArmI64x2ReplaceLaneI32Pair) \
V(ArmI64x2Neg) \
@@ -304,12 +308,12 @@ namespace compiler {
V(ArmS8x8Reverse) \
V(ArmS8x4Reverse) \
V(ArmS8x2Reverse) \
- V(ArmS1x4AnyTrue) \
- V(ArmS1x4AllTrue) \
- V(ArmS1x8AnyTrue) \
- V(ArmS1x8AllTrue) \
- V(ArmS1x16AnyTrue) \
- V(ArmS1x16AllTrue) \
+ V(ArmV32x4AnyTrue) \
+ V(ArmV32x4AllTrue) \
+ V(ArmV16x8AnyTrue) \
+ V(ArmV16x8AllTrue) \
+ V(ArmV8x16AnyTrue) \
+ V(ArmV8x16AllTrue) \
V(ArmS8x16LoadSplat) \
V(ArmS16x8LoadSplat) \
V(ArmS32x4LoadSplat) \
diff --git a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
index 8c09acd6df8..196aa1ce6c0 100644
--- a/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
+++ b/chromium/v8/src/compiler/backend/arm/instruction-scheduler-arm.cc
@@ -124,6 +124,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF64x2Ne:
case kArmF64x2Lt:
case kArmF64x2Le:
+ case kArmF64x2Pmin:
+ case kArmF64x2Pmax:
case kArmF32x4Splat:
case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane:
@@ -145,6 +147,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF32x4Ne:
case kArmF32x4Lt:
case kArmF32x4Le:
+ case kArmF32x4Pmin:
+ case kArmF32x4Pmax:
case kArmI64x2SplatI32Pair:
case kArmI64x2ReplaceLaneI32Pair:
case kArmI64x2Neg:
@@ -284,12 +288,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS8x8Reverse:
case kArmS8x4Reverse:
case kArmS8x2Reverse:
- case kArmS1x4AnyTrue:
- case kArmS1x4AllTrue:
- case kArmS1x8AnyTrue:
- case kArmS1x8AllTrue:
- case kArmS1x16AnyTrue:
- case kArmS1x16AllTrue:
+ case kArmV32x4AnyTrue:
+ case kArmV32x4AllTrue:
+ case kArmV16x8AnyTrue:
+ case kArmV16x8AllTrue:
+ case kArmV8x16AnyTrue:
+ case kArmV8x16AllTrue:
return kNoOpcodeFlags;
case kArmVldrF32:
diff --git a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc
index 74658697b50..de0e7c4162c 100644
--- a/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc
+++ b/chromium/v8/src/compiler/backend/arm/instruction-selector-arm.cc
@@ -1495,7 +1495,10 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
V(Float64RoundTruncate, kArmVrintzF64) \
V(Float64RoundTiesAway, kArmVrintaF64) \
V(Float32RoundTiesEven, kArmVrintnF32) \
- V(Float64RoundTiesEven, kArmVrintnF64)
+ V(Float64RoundTiesEven, kArmVrintnF64) \
+ V(F32x4Ceil, kArmVrintpF32) \
+ V(F32x4Floor, kArmVrintmF32) \
+ V(F32x4Trunc, kArmVrintzF32)
#define RRR_OP_LIST(V) \
V(Int32MulHigh, kArmSmmul) \
@@ -2525,12 +2528,12 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I8x16Neg, kArmI8x16Neg) \
V(I8x16Abs, kArmI8x16Abs) \
V(S128Not, kArmS128Not) \
- V(S1x4AnyTrue, kArmS1x4AnyTrue) \
- V(S1x4AllTrue, kArmS1x4AllTrue) \
- V(S1x8AnyTrue, kArmS1x8AnyTrue) \
- V(S1x8AllTrue, kArmS1x8AllTrue) \
- V(S1x16AnyTrue, kArmS1x16AnyTrue) \
- V(S1x16AllTrue, kArmS1x16AllTrue)
+ V(V32x4AnyTrue, kArmV32x4AnyTrue) \
+ V(V32x4AllTrue, kArmV32x4AllTrue) \
+ V(V16x8AnyTrue, kArmV16x8AnyTrue) \
+ V(V16x8AllTrue, kArmV16x8AllTrue) \
+ V(V8x16AnyTrue, kArmV8x16AnyTrue) \
+ V(V8x16AllTrue, kArmV8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
V(I64x2Shl, 64) \
@@ -2941,6 +2944,42 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) {
VisitBitMask<kArmI32x4BitMask>(this, node);
}
+namespace {
+void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ ArmOperandGenerator g(selector);
+ // Need all unique registers because we first compare the two inputs, then we
+ // need the inputs to remain unchanged for the bitselect later.
+ selector->Emit(opcode, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)));
+}
+
+void VisitF64x2PminOrPMax(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ ArmOperandGenerator g(selector);
+ selector->Emit(opcode, g.DefineSameAsFirst(node),
+ g.UseRegister(node->InputAt(0)),
+ g.UseRegister(node->InputAt(1)));
+}
+} // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+ VisitF32x4PminOrPmax(this, kArmF32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+ VisitF32x4PminOrPmax(this, kArmF32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+ VisitF64x2PminOrPMax(this, kArmF64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+ VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
diff --git a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc
index 4cf19a5d802..d21440c35b3 100644
--- a/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc
+++ b/chromium/v8/src/compiler/backend/arm64/code-generator-arm64.cc
@@ -502,8 +502,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
__ asm_imm(i.OutputSimd128Register().format(), \
i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
} else { \
- VRegister tmp = i.TempSimd128Register(0); \
- Register shift = i.TempRegister(1).gp(); \
+ UseScratchRegisterScope temps(tasm()); \
+ VRegister tmp = temps.AcquireQ(); \
+ Register shift = temps.Acquire##gp(); \
constexpr int mask = (1 << width) - 1; \
__ And(shift, i.InputRegister32(1), mask); \
__ Dup(tmp.format(), shift); \
@@ -521,8 +522,9 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
__ asm_imm(i.OutputSimd128Register().format(), \
i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
} else { \
- VRegister tmp = i.TempSimd128Register(0); \
- Register shift = i.TempRegister(1).gp(); \
+ UseScratchRegisterScope temps(tasm()); \
+ VRegister tmp = temps.AcquireQ(); \
+ Register shift = temps.Acquire##gp(); \
constexpr int mask = (1 << width) - 1; \
__ And(shift, i.InputRegister32(1), mask); \
__ Dup(tmp.format(), shift); \
@@ -1901,6 +1903,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D);
+ case kArm64F64x2Pmin: {
+ VRegister dst = i.OutputSimd128Register().V2D();
+ VRegister lhs = i.InputSimd128Register(0).V2D();
+ VRegister rhs = i.InputSimd128Register(1).V2D();
+ // f64x2.pmin(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
+ // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
+ __ Fcmgt(dst, lhs, rhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F64x2Pmax: {
+ VRegister dst = i.OutputSimd128Register().V2D();
+ VRegister lhs = i.InputSimd128Register(0).V2D();
+ VRegister rhs = i.InputSimd128Register(1).V2D();
+ // f64x2.pmax(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
+ __ Fcmgt(dst, rhs, lhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F64x2RoundUp:
+ __ Frintp(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
+ case kArm64F64x2RoundDown:
+ __ Frintm(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
+ case kArm64F64x2RoundTruncate:
+ __ Frintz(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
+ case kArm64F64x2RoundTiesEven:
+ __ Frintn(i.OutputSimd128Register().V2D(),
+ i.InputSimd128Register(0).V2D());
+ break;
case kArm64F32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
break;
@@ -1953,6 +1992,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S);
SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S);
+ case kArm64F32x4Pmin: {
+ VRegister dst = i.OutputSimd128Register().V4S();
+ VRegister lhs = i.InputSimd128Register(0).V4S();
+ VRegister rhs = i.InputSimd128Register(1).V4S();
+ // f32x4.pmin(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
+ // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
+ __ Fcmgt(dst, lhs, rhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F32x4Pmax: {
+ VRegister dst = i.OutputSimd128Register().V4S();
+ VRegister lhs = i.InputSimd128Register(0).V4S();
+ VRegister rhs = i.InputSimd128Register(1).V4S();
+ // f32x4.pmax(lhs, rhs)
+ // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
+ __ Fcmgt(dst, rhs, lhs);
+ __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+ break;
+ }
+ case kArm64F32x4RoundUp:
+ __ Frintp(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
+ case kArm64F32x4RoundDown:
+ __ Frintm(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
+ case kArm64F32x4RoundTruncate:
+ __ Frintz(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
+ case kArm64F32x4RoundTiesEven:
+ __ Frintn(i.OutputSimd128Register().V4S(),
+ i.InputSimd128Register(0).V4S());
+ break;
case kArm64I64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
break;
@@ -2132,6 +2208,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mov(dst.W(), tmp.V4S(), 0);
break;
}
+ case kArm64I32x4DotI16x8S: {
+ UseScratchRegisterScope scope(tasm());
+ VRegister lhs = i.InputSimd128Register(0);
+ VRegister rhs = i.InputSimd128Register(1);
+ VRegister tmp1 = scope.AcquireV(kFormat4S);
+ VRegister tmp2 = scope.AcquireV(kFormat4S);
+ __ Smull(tmp1, lhs.V4H(), rhs.V4H());
+ __ Smull2(tmp2, lhs.V8H(), rhs.V8H());
+ __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
+ break;
+ }
case kArm64I16x8Splat: {
__ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0));
break;
@@ -2480,7 +2567,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B);
SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B);
SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B);
- case kArm64S1x2AllTrue: {
+ case kArm64V64x2AllTrue: {
UseScratchRegisterScope scope(tasm());
VRegister temp1 = scope.AcquireV(kFormat2D);
VRegister temp2 = scope.AcquireV(kFormatS);
@@ -2508,32 +2595,32 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArm64I16x8Load8x8S: {
- __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
break;
}
case kArm64I16x8Load8x8U: {
- __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
break;
}
case kArm64I32x4Load16x4S: {
- __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
break;
}
case kArm64I32x4Load16x4U: {
- __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
break;
}
case kArm64I64x2Load32x2S: {
- __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
break;
}
case kArm64I64x2Load32x2U: {
- __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
+ __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
__ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
break;
}
@@ -2548,13 +2635,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; \
}
// for AnyTrue, the format does not matter, umaxv does not support 2D
- SIMD_REDUCE_OP_CASE(kArm64S1x2AnyTrue, Umaxv, kFormatS, 4S);
- SIMD_REDUCE_OP_CASE(kArm64S1x4AnyTrue, Umaxv, kFormatS, 4S);
- SIMD_REDUCE_OP_CASE(kArm64S1x4AllTrue, Uminv, kFormatS, 4S);
- SIMD_REDUCE_OP_CASE(kArm64S1x8AnyTrue, Umaxv, kFormatH, 8H);
- SIMD_REDUCE_OP_CASE(kArm64S1x8AllTrue, Uminv, kFormatH, 8H);
- SIMD_REDUCE_OP_CASE(kArm64S1x16AnyTrue, Umaxv, kFormatB, 16B);
- SIMD_REDUCE_OP_CASE(kArm64S1x16AllTrue, Uminv, kFormatB, 16B);
+ SIMD_REDUCE_OP_CASE(kArm64V64x2AnyTrue, Umaxv, kFormatS, 4S);
+ SIMD_REDUCE_OP_CASE(kArm64V32x4AnyTrue, Umaxv, kFormatS, 4S);
+ SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S);
+ SIMD_REDUCE_OP_CASE(kArm64V16x8AnyTrue, Umaxv, kFormatH, 8H);
+ SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H);
+ SIMD_REDUCE_OP_CASE(kArm64V8x16AnyTrue, Umaxv, kFormatB, 16B);
+ SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B);
}
return kSuccess;
} // NOLINT(readability/fn_size)
diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
index a8e2b52c028..41f9d78550e 100644
--- a/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
+++ b/chromium/v8/src/compiler/backend/arm64/instruction-codes-arm64.h
@@ -186,6 +186,12 @@ namespace compiler {
V(Arm64F64x2Le) \
V(Arm64F64x2Qfma) \
V(Arm64F64x2Qfms) \
+ V(Arm64F64x2Pmin) \
+ V(Arm64F64x2Pmax) \
+ V(Arm64F64x2RoundUp) \
+ V(Arm64F64x2RoundDown) \
+ V(Arm64F64x2RoundTruncate) \
+ V(Arm64F64x2RoundTiesEven) \
V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \
@@ -209,6 +215,12 @@ namespace compiler {
V(Arm64F32x4Le) \
V(Arm64F32x4Qfma) \
V(Arm64F32x4Qfms) \
+ V(Arm64F32x4Pmin) \
+ V(Arm64F32x4Pmax) \
+ V(Arm64F32x4RoundUp) \
+ V(Arm64F32x4RoundDown) \
+ V(Arm64F32x4RoundTruncate) \
+ V(Arm64F32x4RoundTiesEven) \
V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \
@@ -256,6 +268,7 @@ namespace compiler {
V(Arm64I32x4GeU) \
V(Arm64I32x4Abs) \
V(Arm64I32x4BitMask) \
+ V(Arm64I32x4DotI16x8S) \
V(Arm64I16x8Splat) \
V(Arm64I16x8ExtractLaneU) \
V(Arm64I16x8ExtractLaneS) \
@@ -361,14 +374,14 @@ namespace compiler {
V(Arm64S8x8Reverse) \
V(Arm64S8x4Reverse) \
V(Arm64S8x2Reverse) \
- V(Arm64S1x2AnyTrue) \
- V(Arm64S1x2AllTrue) \
- V(Arm64S1x4AnyTrue) \
- V(Arm64S1x4AllTrue) \
- V(Arm64S1x8AnyTrue) \
- V(Arm64S1x8AllTrue) \
- V(Arm64S1x16AnyTrue) \
- V(Arm64S1x16AllTrue) \
+ V(Arm64V64x2AnyTrue) \
+ V(Arm64V64x2AllTrue) \
+ V(Arm64V32x4AnyTrue) \
+ V(Arm64V32x4AllTrue) \
+ V(Arm64V16x8AnyTrue) \
+ V(Arm64V16x8AllTrue) \
+ V(Arm64V8x16AnyTrue) \
+ V(Arm64V8x16AllTrue) \
V(Arm64S8x16LoadSplat) \
V(Arm64S16x8LoadSplat) \
V(Arm64S32x4LoadSplat) \
diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
index 128ebdac957..3ea84730801 100644
--- a/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
+++ b/chromium/v8/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
@@ -156,6 +156,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Le:
case kArm64F64x2Qfma:
case kArm64F64x2Qfms:
+ case kArm64F64x2Pmin:
+ case kArm64F64x2Pmax:
+ case kArm64F64x2RoundUp:
+ case kArm64F64x2RoundDown:
+ case kArm64F64x2RoundTruncate:
+ case kArm64F64x2RoundTiesEven:
case kArm64F32x4Splat:
case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane:
@@ -179,6 +185,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Le:
case kArm64F32x4Qfma:
case kArm64F32x4Qfms:
+ case kArm64F32x4Pmin:
+ case kArm64F32x4Pmax:
+ case kArm64F32x4RoundUp:
+ case kArm64F32x4RoundDown:
+ case kArm64F32x4RoundTruncate:
+ case kArm64F32x4RoundTiesEven:
case kArm64I64x2Splat:
case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane:
@@ -226,6 +238,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4GeU:
case kArm64I32x4Abs:
case kArm64I32x4BitMask:
+ case kArm64I32x4DotI16x8S:
case kArm64I16x8Splat:
case kArm64I16x8ExtractLaneU:
case kArm64I16x8ExtractLaneS:
@@ -331,14 +344,14 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64S8x8Reverse:
case kArm64S8x4Reverse:
case kArm64S8x2Reverse:
- case kArm64S1x2AnyTrue:
- case kArm64S1x2AllTrue:
- case kArm64S1x4AnyTrue:
- case kArm64S1x4AllTrue:
- case kArm64S1x8AnyTrue:
- case kArm64S1x8AllTrue:
- case kArm64S1x16AnyTrue:
- case kArm64S1x16AllTrue:
+ case kArm64V64x2AnyTrue:
+ case kArm64V64x2AllTrue:
+ case kArm64V32x4AnyTrue:
+ case kArm64V32x4AllTrue:
+ case kArm64V16x8AnyTrue:
+ case kArm64V16x8AllTrue:
+ case kArm64V8x16AnyTrue:
+ case kArm64V8x16AllTrue:
case kArm64TestAndBranch32:
case kArm64TestAndBranch:
case kArm64CompareAndBranch32:
diff --git a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
index 06a87a8aab7..2e0d977c3c7 100644
--- a/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
+++ b/chromium/v8/src/compiler/backend/arm64/instruction-selector-arm64.cc
@@ -163,13 +163,9 @@ void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
g.UseImmediate(node->InputAt(1)));
}
} else {
- InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
- // We only need a unique register for the first input (src), since in
- // the codegen we use tmp to store the shifts, and then later use it with
- // src. The second input can be the same as the second temp (shift).
selector->Emit(opcode, g.DefineAsRegister(node),
- g.UseUniqueRegister(node->InputAt(0)),
- g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
+ g.UseRegister(node->InputAt(0)),
+ g.UseRegister(node->InputAt(1)));
}
}
@@ -608,18 +604,23 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
void InstructionSelector::VisitLoadTransform(Node* node) {
LoadTransformParameters params = LoadTransformParametersOf(node->op());
InstructionCode opcode = kArchNop;
+ bool require_add = false;
switch (params.transformation) {
case LoadTransformation::kS8x16LoadSplat:
opcode = kArm64S8x16LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kS16x8LoadSplat:
opcode = kArm64S16x8LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kS32x4LoadSplat:
opcode = kArm64S32x4LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kS64x2LoadSplat:
opcode = kArm64S64x2LoadSplat;
+ require_add = true;
break;
case LoadTransformation::kI16x8Load8x8S:
opcode = kArm64I16x8Load8x8S;
@@ -655,13 +656,17 @@ void InstructionSelector::VisitLoadTransform(Node* node) {
inputs[1] = g.UseRegister(index);
outputs[0] = g.DefineAsRegister(node);
- // ld1r uses post-index, so construct address first.
- // TODO(v8:9886) If index can be immediate, use vldr without this add.
- InstructionOperand addr = g.TempRegister();
- Emit(kArm64Add, 1, &addr, 2, inputs);
- inputs[0] = addr;
- inputs[1] = g.TempImmediate(0);
- opcode |= AddressingModeField::encode(kMode_MRI);
+ if (require_add) {
+ // ld1r uses post-index, so construct address first.
+ // TODO(v8:9886) If index can be immediate, use vldr without this add.
+ InstructionOperand addr = g.TempRegister();
+ Emit(kArm64Add, 1, &addr, 2, inputs);
+ inputs[0] = addr;
+ inputs[1] = g.TempImmediate(0);
+ opcode |= AddressingModeField::encode(kMode_MRI);
+ } else {
+ opcode |= AddressingModeField::encode(kMode_MRR);
+ }
Emit(opcode, 1, outputs, 2, inputs);
}
@@ -1360,7 +1365,15 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \
V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \
V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \
- V(Float64SilenceNaN, kArm64Float64SilenceNaN)
+ V(Float64SilenceNaN, kArm64Float64SilenceNaN) \
+ V(F32x4Ceil, kArm64F32x4RoundUp) \
+ V(F32x4Floor, kArm64F32x4RoundDown) \
+ V(F32x4Trunc, kArm64F32x4RoundTruncate) \
+ V(F32x4NearestInt, kArm64F32x4RoundTiesEven) \
+ V(F64x2Ceil, kArm64F64x2RoundUp) \
+ V(F64x2Floor, kArm64F64x2RoundDown) \
+ V(F64x2Trunc, kArm64F64x2RoundTruncate) \
+ V(F64x2NearestInt, kArm64F64x2RoundTiesEven)
#define RRR_OP_LIST(V) \
V(Int32Div, kArm64Idiv32) \
@@ -3184,14 +3197,14 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \
V(S128Not, kArm64S128Not) \
- V(S1x2AnyTrue, kArm64S1x2AnyTrue) \
- V(S1x2AllTrue, kArm64S1x2AllTrue) \
- V(S1x4AnyTrue, kArm64S1x4AnyTrue) \
- V(S1x4AllTrue, kArm64S1x4AllTrue) \
- V(S1x8AnyTrue, kArm64S1x8AnyTrue) \
- V(S1x8AllTrue, kArm64S1x8AllTrue) \
- V(S1x16AnyTrue, kArm64S1x16AnyTrue) \
- V(S1x16AllTrue, kArm64S1x16AllTrue)
+ V(V64x2AnyTrue, kArm64V64x2AnyTrue) \
+ V(V64x2AllTrue, kArm64V64x2AllTrue) \
+ V(V32x4AnyTrue, kArm64V32x4AnyTrue) \
+ V(V32x4AllTrue, kArm64V32x4AllTrue) \
+ V(V16x8AnyTrue, kArm64V16x8AnyTrue) \
+ V(V16x8AllTrue, kArm64V16x8AllTrue) \
+ V(V8x16AnyTrue, kArm64V8x16AnyTrue) \
+ V(V8x16AllTrue, kArm64V8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
V(I64x2Shl, 64) \
@@ -3249,6 +3262,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4MaxU, kArm64I32x4MaxU) \
V(I32x4GtU, kArm64I32x4GtU) \
V(I32x4GeU, kArm64I32x4GeU) \
+ V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \
V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \
V(I16x8AddSaturateS, kArm64I16x8AddSaturateS) \
V(I16x8AddHoriz, kArm64I16x8AddHoriz) \
@@ -3613,6 +3627,34 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
VisitRR(this, kArm64Sxtw, node);
}
+namespace {
+void VisitPminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ Arm64OperandGenerator g(selector);
+ // Need all unique registers because we first compare the two inputs, then we
+ // need the inputs to remain unchanged for the bitselect later.
+ selector->Emit(opcode, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)));
+}
+} // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+ VisitPminOrPmax(this, kArm64F32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+ VisitPminOrPmax(this, kArm64F32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+ VisitPminOrPmax(this, kArm64F64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+ VisitPminOrPmax(this, kArm64F64x2Pmax, node);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
diff --git a/chromium/v8/src/compiler/backend/code-generator.cc b/chromium/v8/src/compiler/backend/code-generator.cc
index 72c5750035a..83dccf69e82 100644
--- a/chromium/v8/src/compiler/backend/code-generator.cc
+++ b/chromium/v8/src/compiler/backend/code-generator.cc
@@ -55,19 +55,20 @@ CodeGenerator::CodeGenerator(
frame_access_state_(nullptr),
linkage_(linkage),
instructions_(instructions),
- unwinding_info_writer_(zone()),
+ unwinding_info_writer_(codegen_zone),
info_(info),
- labels_(zone()->NewArray<Label>(instructions->InstructionBlockCount())),
+ labels_(
+ codegen_zone->NewArray<Label>(instructions->InstructionBlockCount())),
current_block_(RpoNumber::Invalid()),
start_source_position_(start_source_position),
current_source_position_(SourcePosition::Unknown()),
tasm_(isolate, options, CodeObjectRequired::kNo, std::move(buffer)),
resolver_(this),
- safepoints_(zone()),
- handlers_(zone()),
- deoptimization_exits_(zone()),
- deoptimization_literals_(zone()),
- translations_(zone()),
+ safepoints_(codegen_zone),
+ handlers_(codegen_zone),
+ deoptimization_exits_(codegen_zone),
+ deoptimization_literals_(codegen_zone),
+ translations_(codegen_zone),
max_unoptimized_frame_height_(max_unoptimized_frame_height),
max_pushed_argument_count_(max_pushed_argument_count),
caller_registers_saved_(false),
@@ -77,12 +78,12 @@ CodeGenerator::CodeGenerator(
osr_pc_offset_(-1),
optimized_out_literal_id_(-1),
source_position_table_builder_(
- SourcePositionTableBuilder::RECORD_SOURCE_POSITIONS),
- protected_instructions_(zone()),
+ codegen_zone, SourcePositionTableBuilder::RECORD_SOURCE_POSITIONS),
+ protected_instructions_(codegen_zone),
result_(kSuccess),
poisoning_level_(poisoning_level),
- block_starts_(zone()),
- instr_starts_(zone()) {
+ block_starts_(codegen_zone),
+ instr_starts_(codegen_zone) {
for (int i = 0; i < instructions->InstructionBlockCount(); ++i) {
new (&labels_[i]) Label;
}
@@ -161,7 +162,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleDeoptimizerCall(
DeoptimizeReason deoptimization_reason = exit->reason();
Address deopt_entry =
Deoptimizer::GetDeoptimizationEntry(tasm()->isolate(), deopt_kind);
- if (info()->is_source_positions_enabled()) {
+ if (info()->source_positions()) {
tasm()->RecordDeoptReason(deoptimization_reason, exit->pos(),
deoptimization_id);
}
@@ -191,7 +192,7 @@ void CodeGenerator::AssembleCode() {
// the frame (that is done in AssemblePrologue).
FrameScope frame_scope(tasm(), StackFrame::MANUAL);
- if (info->is_source_positions_enabled()) {
+ if (info->source_positions()) {
AssembleSourcePosition(start_source_position());
}
offsets_info_.code_start_register_check = tasm()->pc_offset();
@@ -242,7 +243,7 @@ void CodeGenerator::AssembleCode() {
unwinding_info_writer_.SetNumberOfInstructionBlocks(
instructions()->InstructionBlockCount());
- if (info->trace_turbo_json_enabled()) {
+ if (info->trace_turbo_json()) {
block_starts_.assign(instructions()->instruction_blocks().size(), -1);
instr_starts_.assign(instructions()->instructions().size(), {});
}
@@ -253,7 +254,7 @@ void CodeGenerator::AssembleCode() {
if (block->ShouldAlign() && !tasm()->jump_optimization_info()) {
tasm()->CodeTargetAlign();
}
- if (info->trace_turbo_json_enabled()) {
+ if (info->trace_turbo_json()) {
block_starts_[block->rpo_number().ToInt()] = tasm()->pc_offset();
}
// Bind a label for a block.
@@ -503,6 +504,7 @@ MaybeHandle<Code> CodeGenerator::FinalizeCode() {
.set_deoptimization_data(deopt_data)
.set_is_turbofanned()
.set_stack_slots(frame()->GetTotalFrameSlotCount())
+ .set_profiler_data(info()->profiler_data())
.TryBuild();
Handle<Code> code;
@@ -721,7 +723,7 @@ RpoNumber CodeGenerator::ComputeBranchInfo(BranchInfo* branch,
CodeGenerator::CodeGenResult CodeGenerator::AssembleInstruction(
int instruction_index, const InstructionBlock* block) {
Instruction* instr = instructions()->InstructionAt(instruction_index);
- if (info()->trace_turbo_json_enabled()) {
+ if (info()->trace_turbo_json()) {
instr_starts_[instruction_index].gap_pc_offset = tasm()->pc_offset();
}
int first_unused_stack_slot;
@@ -741,14 +743,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleInstruction(
if (instr->IsJump() && block->must_deconstruct_frame()) {
AssembleDeconstructFrame();
}
- if (info()->trace_turbo_json_enabled()) {
+ if (info()->trace_turbo_json()) {
instr_starts_[instruction_index].arch_instr_pc_offset = tasm()->pc_offset();
}
// Assemble architecture-specific code for the instruction.
CodeGenResult result = AssembleArchInstruction(instr);
if (result != kSuccess) return result;
- if (info()->trace_turbo_json_enabled()) {
+ if (info()->trace_turbo_json()) {
instr_starts_[instruction_index].condition_pc_offset = tasm()->pc_offset();
}
@@ -832,7 +834,7 @@ void CodeGenerator::AssembleSourcePosition(SourcePosition source_position) {
buffer << "-- ";
// Turbolizer only needs the source position, as it can reconstruct
// the inlining stack from other information.
- if (info->trace_turbo_json_enabled() || !tasm()->isolate() ||
+ if (info->trace_turbo_json() || !tasm()->isolate() ||
tasm()->isolate()->concurrent_recompilation_enabled()) {
buffer << source_position;
} else {
@@ -979,7 +981,8 @@ void CodeGenerator::RecordCallPosition(Instruction* instr) {
InstructionOperandConverter i(this, instr);
RpoNumber handler_rpo = i.InputRpo(instr->InputCount() - 1);
DCHECK(instructions()->InstructionBlockAt(handler_rpo)->IsHandler());
- handlers_.push_back({GetLabel(handler_rpo), tasm()->pc_offset()});
+ handlers_.push_back(
+ {GetLabel(handler_rpo), tasm()->pc_offset_for_safepoint()});
}
if (needs_frame_state) {
@@ -989,7 +992,7 @@ void CodeGenerator::RecordCallPosition(Instruction* instr) {
size_t frame_state_offset = 2;
FrameStateDescriptor* descriptor =
GetDeoptimizationEntry(instr, frame_state_offset).descriptor();
- int pc_offset = tasm()->pc_offset();
+ int pc_offset = tasm()->pc_offset_for_safepoint();
BuildTranslation(instr, pc_offset, frame_state_offset,
descriptor->state_combine());
}
@@ -1329,7 +1332,7 @@ void CodeGenerator::InitializeSpeculationPoison() {
if (info()->called_with_code_start_register()) {
tasm()->RecordComment("-- Prologue: generate speculation poison --");
GenerateSpeculationPoisonFromCodeStartRegister();
- if (info()->is_poisoning_register_arguments()) {
+ if (info()->poison_register_arguments()) {
AssembleRegisterArgumentPoisoning();
}
} else {
diff --git a/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc
index c673458c753..f5a69eec3ea 100644
--- a/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc
+++ b/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc
@@ -2032,6 +2032,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Maxpd(dst, dst, i.InputSimd128Register(1));
break;
}
+ case kIA32F64x2Round: {
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
+ break;
+ }
case kIA32I64x2SplatI32Pair: {
XMMRegister dst = i.OutputSimd128Register();
__ Pinsrd(dst, i.InputRegister(0), 0);
@@ -2442,6 +2448,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Maxps(dst, dst, i.InputSimd128Register(1));
break;
}
+ case kIA32F32x4Round: {
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
+ break;
+ }
case kIA32I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
@@ -2795,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
+ case kIA32I32x4DotI16x8S: {
+ __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
case kIA32I16x8Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
@@ -3687,7 +3704,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Out-of-range indices should return 0, add 112 so that any value > 15
// saturates to 128 (top bit set), so pshufb will zero that lane.
- __ Move(mask, (uint32_t)0x70707070);
+ __ Move(mask, uint32_t{0x70707070});
__ Pshufd(mask, mask, 0x0);
__ Paddusb(mask, i.InputSimd128Register(1));
__ Pshufb(dst, mask);
@@ -4094,9 +4111,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpor(dst, dst, kScratchDoubleReg);
break;
}
- case kIA32S1x4AnyTrue:
- case kIA32S1x8AnyTrue:
- case kIA32S1x16AnyTrue: {
+ case kIA32V32x4AnyTrue:
+ case kIA32V16x8AnyTrue:
+ case kIA32V8x16AnyTrue: {
Register dst = i.OutputRegister();
XMMRegister src = i.InputSimd128Register(0);
Register tmp = i.TempRegister(0);
@@ -4110,13 +4127,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
// 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
// respectively.
- case kIA32S1x4AllTrue:
+ case kIA32V32x4AllTrue:
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
break;
- case kIA32S1x8AllTrue:
+ case kIA32V16x8AllTrue:
ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw);
break;
- case kIA32S1x16AllTrue: {
+ case kIA32V8x16AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
break;
}
diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h b/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h
index d347d672021..4c49539c4e9 100644
--- a/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h
+++ b/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h
@@ -136,6 +136,7 @@ namespace compiler {
V(IA32F64x2Le) \
V(IA32F64x2Pmin) \
V(IA32F64x2Pmax) \
+ V(IA32F64x2Round) \
V(IA32I64x2SplatI32Pair) \
V(IA32I64x2ReplaceLaneI32Pair) \
V(IA32I64x2Neg) \
@@ -186,6 +187,7 @@ namespace compiler {
V(AVXF32x4Le) \
V(IA32F32x4Pmin) \
V(IA32F32x4Pmax) \
+ V(IA32F32x4Round) \
V(IA32I32x4Splat) \
V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \
@@ -232,6 +234,7 @@ namespace compiler {
V(AVXI32x4GeU) \
V(IA32I32x4Abs) \
V(IA32I32x4BitMask) \
+ V(IA32I32x4DotI16x8S) \
V(IA32I16x8Splat) \
V(IA32I16x8ExtractLaneU) \
V(IA32I16x8ExtractLaneS) \
@@ -396,12 +399,12 @@ namespace compiler {
V(AVXS8x4Reverse) \
V(SSES8x2Reverse) \
V(AVXS8x2Reverse) \
- V(IA32S1x4AnyTrue) \
- V(IA32S1x4AllTrue) \
- V(IA32S1x8AnyTrue) \
- V(IA32S1x8AllTrue) \
- V(IA32S1x16AnyTrue) \
- V(IA32S1x16AllTrue) \
+ V(IA32V32x4AnyTrue) \
+ V(IA32V32x4AllTrue) \
+ V(IA32V16x8AnyTrue) \
+ V(IA32V16x8AllTrue) \
+ V(IA32V8x16AnyTrue) \
+ V(IA32V8x16AllTrue) \
V(IA32Word32AtomicPairLoad) \
V(IA32Word32AtomicPairStore) \
V(IA32Word32AtomicPairAdd) \
diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
index 52f0b0356ff..6d0062ba09e 100644
--- a/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
+++ b/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
@@ -117,6 +117,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F64x2Le:
case kIA32F64x2Pmin:
case kIA32F64x2Pmax:
+ case kIA32F64x2Round:
case kIA32I64x2SplatI32Pair:
case kIA32I64x2ReplaceLaneI32Pair:
case kIA32I64x2Neg:
@@ -167,6 +168,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Le:
case kIA32F32x4Pmin:
case kIA32F32x4Pmax:
+ case kIA32F32x4Round:
case kIA32I32x4Splat:
case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane:
@@ -213,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4GeU:
case kIA32I32x4Abs:
case kIA32I32x4BitMask:
+ case kIA32I32x4DotI16x8S:
case kIA32I16x8Splat:
case kIA32I16x8ExtractLaneU:
case kIA32I16x8ExtractLaneS:
@@ -367,12 +370,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXS8x4Reverse:
case kSSES8x2Reverse:
case kAVXS8x2Reverse:
- case kIA32S1x4AnyTrue:
- case kIA32S1x4AllTrue:
- case kIA32S1x8AnyTrue:
- case kIA32S1x8AllTrue:
- case kIA32S1x16AnyTrue:
- case kIA32S1x16AllTrue:
+ case kIA32V32x4AnyTrue:
+ case kIA32V32x4AllTrue:
+ case kIA32V16x8AnyTrue:
+ case kIA32V16x8AllTrue:
+ case kIA32V8x16AnyTrue:
+ case kIA32V8x16AllTrue:
return (instr->addressing_mode() == kMode_None)
? kNoOpcodeFlags
: kIsLoadOperation | kHasSideEffect;
diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc b/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc
index c50464f4b86..5ed7c24e6bf 100644
--- a/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc
+++ b/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc
@@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node,
}
}
+// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
+// a register as we don't have memory alignment yet. For AVX, memory operands
+// are fine, but can have performance issues if not aligned to 16/32 bytes
+// (based on load size), see SDM Vol 1, chapter 14.9
+void VisitRROSimd(InstructionSelector* selector, Node* node,
+ ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+ IA32OperandGenerator g(selector);
+ InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+ if (selector->IsSupported(AVX)) {
+ selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
+ g.Use(node->InputAt(1)));
+ } else {
+ selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
+ g.UseRegister(node->InputAt(1)));
+ }
+}
+
void VisitRRISimd(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
@@ -941,7 +958,16 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \
kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \
- V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest))
+ V(Float64RoundTiesEven, \
+ kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \
+ V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
+ V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \
+ V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \
+ V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \
+ V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \
+ V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \
+ V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
+ V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest))
#define RRO_FLOAT_OP_LIST(V) \
V(Float32Add, kAVXFloat32Add, kSSEFloat32Add) \
@@ -2100,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
V(I64x2Add) \
V(I64x2Sub) \
+ V(I32x4DotI16x8S) \
V(I16x8RoundingAverageU) \
V(I8x16RoundingAverageU)
@@ -2131,14 +2158,14 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(S128Not)
#define SIMD_ANYTRUE_LIST(V) \
- V(S1x4AnyTrue) \
- V(S1x8AnyTrue) \
- V(S1x16AnyTrue)
+ V(V32x4AnyTrue) \
+ V(V16x8AnyTrue) \
+ V(V8x16AnyTrue)
#define SIMD_ALLTRUE_LIST(V) \
- V(S1x4AllTrue) \
- V(S1x8AllTrue) \
- V(S1x16AllTrue)
+ V(V32x4AllTrue) \
+ V(V16x8AllTrue) \
+ V(V8x16AllTrue)
#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
V(I64x2Shl) \
@@ -2372,10 +2399,15 @@ SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX
#undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX
-#define VISIT_SIMD_UNOP(Opcode) \
- void InstructionSelector::Visit##Opcode(Node* node) { \
- IA32OperandGenerator g(this); \
- Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
+// TODO(v8:9198): SSE requires operand0 to be a register as we don't have memory
+// alignment yet. For AVX, memory operands are fine, but can have performance
+// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1,
+// chapter 14.9
+#define VISIT_SIMD_UNOP(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ IA32OperandGenerator g(this); \
+ Emit(kIA32##Opcode, g.DefineAsRegister(node), \
+ g.UseRegister(node->InputAt(0))); \
}
SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
#undef VISIT_SIMD_UNOP
@@ -2407,23 +2439,23 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
IA32OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
Emit(kIA32##Opcode, g.DefineAsRegister(node), \
- g.UseUnique(node->InputAt(0)), arraysize(temps), temps); \
+ g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
}
SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
#undef VISIT_SIMD_ALLTRUE
#undef SIMD_ALLTRUE_LIST
-#define VISIT_SIMD_BINOP(Opcode) \
- void InstructionSelector::Visit##Opcode(Node* node) { \
- VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \
+#define VISIT_SIMD_BINOP(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
}
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
#undef VISIT_SIMD_BINOP
#undef SIMD_BINOP_LIST
-#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
- void InstructionSelector::Visit##Opcode(Node* node) { \
- VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \
+#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
}
SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
diff --git a/chromium/v8/src/compiler/backend/instruction-selector-impl.h b/chromium/v8/src/compiler/backend/instruction-selector-impl.h
index aa7da85e42b..7e1f183fb71 100644
--- a/chromium/v8/src/compiler/backend/instruction-selector-impl.h
+++ b/chromium/v8/src/compiler/backend/instruction-selector-impl.h
@@ -356,6 +356,8 @@ class OperandGenerator {
case MachineRepresentation::kCompressed:
case MachineRepresentation::kCompressedPointer:
return Constant(static_cast<int32_t>(0));
+ case MachineRepresentation::kWord64:
+ return Constant(static_cast<int64_t>(0));
case MachineRepresentation::kFloat64:
return Constant(static_cast<double>(0));
case MachineRepresentation::kFloat32:
diff --git a/chromium/v8/src/compiler/backend/instruction-selector.cc b/chromium/v8/src/compiler/backend/instruction-selector.cc
index c2022b574ee..8ad88b946b4 100644
--- a/chromium/v8/src/compiler/backend/instruction-selector.cc
+++ b/chromium/v8/src/compiler/backend/instruction-selector.cc
@@ -1043,7 +1043,8 @@ void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
InstructionOperand op = g.UseLocation(*iter, location);
UnallocatedOperand unallocated = UnallocatedOperand::cast(op);
if (unallocated.HasFixedSlotPolicy() && !call_tail) {
- int stack_index = -unallocated.fixed_slot_index() - 1;
+ int stack_index = buffer->descriptor->GetStackIndexFromSlot(
+ unallocated.fixed_slot_index());
// This can insert empty slots before stack_index and will insert enough
// slots after stack_index to store the parameter.
if (static_cast<size_t>(stack_index) >= buffer->pushed_nodes.size()) {
@@ -1888,6 +1889,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Pmin(node);
case IrOpcode::kF64x2Pmax:
return MarkAsSimd128(node), VisitF64x2Pmax(node);
+ case IrOpcode::kF64x2Ceil:
+ return MarkAsSimd128(node), VisitF64x2Ceil(node);
+ case IrOpcode::kF64x2Floor:
+ return MarkAsSimd128(node), VisitF64x2Floor(node);
+ case IrOpcode::kF64x2Trunc:
+ return MarkAsSimd128(node), VisitF64x2Trunc(node);
+ case IrOpcode::kF64x2NearestInt:
+ return MarkAsSimd128(node), VisitF64x2NearestInt(node);
case IrOpcode::kF32x4Splat:
return MarkAsSimd128(node), VisitF32x4Splat(node);
case IrOpcode::kF32x4ExtractLane:
@@ -1938,6 +1947,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Pmin(node);
case IrOpcode::kF32x4Pmax:
return MarkAsSimd128(node), VisitF32x4Pmax(node);
+ case IrOpcode::kF32x4Ceil:
+ return MarkAsSimd128(node), VisitF32x4Ceil(node);
+ case IrOpcode::kF32x4Floor:
+ return MarkAsSimd128(node), VisitF32x4Floor(node);
+ case IrOpcode::kF32x4Trunc:
+ return MarkAsSimd128(node), VisitF32x4Trunc(node);
+ case IrOpcode::kF32x4NearestInt:
+ return MarkAsSimd128(node), VisitF32x4NearestInt(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI64x2SplatI32Pair:
@@ -2040,6 +2057,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4Abs(node);
case IrOpcode::kI32x4BitMask:
return MarkAsWord32(node), VisitI32x4BitMask(node);
+ case IrOpcode::kI32x4DotI16x8S:
+ return MarkAsSimd128(node), VisitI32x4DotI16x8S(node);
case IrOpcode::kI16x8Splat:
return MarkAsSimd128(node), VisitI16x8Splat(node);
case IrOpcode::kI16x8ExtractLaneU:
@@ -2188,22 +2207,22 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitS8x16Swizzle(node);
case IrOpcode::kS8x16Shuffle:
return MarkAsSimd128(node), VisitS8x16Shuffle(node);
- case IrOpcode::kS1x2AnyTrue:
- return MarkAsWord32(node), VisitS1x2AnyTrue(node);
- case IrOpcode::kS1x2AllTrue:
- return MarkAsWord32(node), VisitS1x2AllTrue(node);
- case IrOpcode::kS1x4AnyTrue:
- return MarkAsWord32(node), VisitS1x4AnyTrue(node);
- case IrOpcode::kS1x4AllTrue:
- return MarkAsWord32(node), VisitS1x4AllTrue(node);
- case IrOpcode::kS1x8AnyTrue:
- return MarkAsWord32(node), VisitS1x8AnyTrue(node);
- case IrOpcode::kS1x8AllTrue:
- return MarkAsWord32(node), VisitS1x8AllTrue(node);
- case IrOpcode::kS1x16AnyTrue:
- return MarkAsWord32(node), VisitS1x16AnyTrue(node);
- case IrOpcode::kS1x16AllTrue:
- return MarkAsWord32(node), VisitS1x16AllTrue(node);
+ case IrOpcode::kV64x2AnyTrue:
+ return MarkAsWord32(node), VisitV64x2AnyTrue(node);
+ case IrOpcode::kV64x2AllTrue:
+ return MarkAsWord32(node), VisitV64x2AllTrue(node);
+ case IrOpcode::kV32x4AnyTrue:
+ return MarkAsWord32(node), VisitV32x4AnyTrue(node);
+ case IrOpcode::kV32x4AllTrue:
+ return MarkAsWord32(node), VisitV32x4AllTrue(node);
+ case IrOpcode::kV16x8AnyTrue:
+ return MarkAsWord32(node), VisitV16x8AnyTrue(node);
+ case IrOpcode::kV16x8AllTrue:
+ return MarkAsWord32(node), VisitV16x8AllTrue(node);
+ case IrOpcode::kV8x16AnyTrue:
+ return MarkAsWord32(node), VisitV8x16AnyTrue(node);
+ case IrOpcode::kV8x16AllTrue:
+ return MarkAsWord32(node), VisitV8x16AllTrue(node);
default:
FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
node->op()->mnemonic(), node->id());
@@ -2638,8 +2657,8 @@ void InstructionSelector::VisitI64x2GtS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GtU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2GeU(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitS1x2AnyTrue(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitS1x2AllTrue(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV64x2AnyTrue(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV64x2AllTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); }
@@ -2651,23 +2670,45 @@ void InstructionSelector::VisitI64x2MinU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2MaxU(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
+#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 && \
+ !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_MIPS && \
+ !V8_TARGET_ARCH_MIPS64
// TODO(v8:10308) Bitmask operations are in prototype now, we can remove these
// guards when they go into the proposal.
-#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32 && \
- !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI8x16BitMask(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI16x8BitMask(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
-#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
- // && !V8_TARGET_ARCH_X64
-
// TODO(v8:10501) Prototyping pmin and pmax instructions.
-#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
-#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
+#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
+ // && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X &&
+ // !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
+
+#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X && \
+ !V8_TARGET_ARCH_IA32
+// TODO(v8:10553) Prototyping floating point rounding instructions.
+// TODO(zhin): Temporary convoluted way to for unimplemented opcodes on ARM as
+// we are implementing them one at a time.
+#if !V8_TARGET_ARCH_ARM
+void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
+#endif // !V8_TARGET_ARCH_ARM
+void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
+#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
+ // && !V8_TARGET_ARCH_IA32
+
+#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
+// TODO(v8:10583) Prototype i32x4.dot_i16x8_s
+void InstructionSelector::VisitI32x4DotI16x8S(Node* node) { UNIMPLEMENTED(); }
+#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
@@ -2808,7 +2849,7 @@ void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) {
switch (call_descriptor->kind()) {
case CallDescriptor::kCallAddress: {
int misc_field = static_cast<int>(call_descriptor->ParameterCount());
-#if defined(_AIX)
+#if ABI_USES_FUNCTION_DESCRIPTORS
// Highest misc_field bit is used on AIX to indicate if a CFunction call
// has function descriptor or not.
if (!call_descriptor->NoFunctionDescriptor()) {
@@ -3038,7 +3079,7 @@ void InstructionSelector::VisitUnreachable(Node* node) {
void InstructionSelector::VisitStaticAssert(Node* node) {
Node* asserted = node->InputAt(0);
- asserted->Print(2);
+ asserted->Print(4);
FATAL("Expected turbofan static assert to hold, but got non-true input!\n");
}
diff --git a/chromium/v8/src/compiler/backend/instruction.h b/chromium/v8/src/compiler/backend/instruction.h
index e189100c346..f40a4198f81 100644
--- a/chromium/v8/src/compiler/backend/instruction.h
+++ b/chromium/v8/src/compiler/backend/instruction.h
@@ -1536,7 +1536,7 @@ class V8_EXPORT_PRIVATE InstructionSequence final
return virtual_register;
}
Constant GetConstant(int virtual_register) const {
- ConstantMap::const_iterator it = constants_.find(virtual_register);
+ auto it = constants_.find(virtual_register);
DCHECK(it != constants_.end());
DCHECK_EQ(virtual_register, it->first);
return it->second;
diff --git a/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc b/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc
index c83a4e28ee1..b9c1eb11d92 100644
--- a/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc
+++ b/chromium/v8/src/compiler/backend/mips/code-generator-mips.cc
@@ -2159,6 +2159,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ insert_w(dst, i.InputInt8(1) * 2 + 1, kScratchReg);
break;
}
+ case kMipsF64x2Pmin: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = rhs < lhs ? rhs : lhs
+ __ fclt_d(dst, rhs, lhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
+ case kMipsF64x2Pmax: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = lhs < rhs ? rhs : lhs
+ __ fclt_d(dst, lhs, rhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
case kMipsI64x2Add: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ addv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
@@ -2395,6 +2415,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
+ case kMipsF32x4Pmin: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = rhs < lhs ? rhs : lhs
+ __ fclt_w(dst, rhs, lhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
+ case kMipsF32x4Pmax: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = lhs < rhs ? rhs : lhs
+ __ fclt_w(dst, lhs, rhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
case kMipsI32x4SConvertF32x4: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
@@ -2442,6 +2482,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero);
break;
}
+ case kMipsI32x4BitMask: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Register dst = i.OutputRegister();
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register scratch0 = kSimd128RegZero;
+ Simd128Register scratch1 = kSimd128ScratchReg;
+ __ srli_w(scratch0, src, 31);
+ __ srli_d(scratch1, scratch0, 31);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ shf_w(scratch1, scratch0, 0x0E);
+ __ slli_d(scratch1, scratch1, 2);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ copy_u_b(dst, scratch0, 0);
+ break;
+ }
case kMipsI16x8Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_h(i.OutputSimd128Register(), i.InputRegister(0));
@@ -2609,6 +2664,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero);
break;
}
+ case kMipsI16x8BitMask: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Register dst = i.OutputRegister();
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register scratch0 = kSimd128RegZero;
+ Simd128Register scratch1 = kSimd128ScratchReg;
+ __ srli_h(scratch0, src, 15);
+ __ srli_w(scratch1, scratch0, 15);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ srli_d(scratch1, scratch0, 30);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ shf_w(scratch1, scratch0, 0x0E);
+ __ slli_d(scratch1, scratch1, 4);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ copy_u_b(dst, scratch0, 0);
+ break;
+ }
case kMipsI8x16Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_b(i.OutputSimd128Register(), i.InputRegister(0));
@@ -2776,6 +2848,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero);
break;
}
+ case kMipsI8x16BitMask: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Register dst = i.OutputRegister();
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register scratch0 = kSimd128RegZero;
+ Simd128Register scratch1 = kSimd128ScratchReg;
+ __ srli_b(scratch0, src, 7);
+ __ srli_h(scratch1, scratch0, 7);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ srli_w(scratch1, scratch0, 14);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ srli_d(scratch1, scratch0, 28);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ shf_w(scratch1, scratch0, 0x0E);
+ __ ilvev_b(scratch0, scratch1, scratch0);
+ __ copy_u_h(dst, scratch0, 0);
+ break;
+ }
case kMipsS128And: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
@@ -2800,9 +2890,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0));
break;
}
- case kMipsS1x4AnyTrue:
- case kMipsS1x8AnyTrue:
- case kMipsS1x16AnyTrue: {
+ case kMipsV32x4AnyTrue:
+ case kMipsV16x8AnyTrue:
+ case kMipsV8x16AnyTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_false;
@@ -2814,7 +2904,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bind(&all_false);
break;
}
- case kMipsS1x4AllTrue: {
+ case kMipsV32x4AllTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_true;
@@ -2825,7 +2915,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bind(&all_true);
break;
}
- case kMipsS1x8AllTrue: {
+ case kMipsV16x8AllTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_true;
@@ -2836,7 +2926,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bind(&all_true);
break;
}
- case kMipsS1x16AllTrue: {
+ case kMipsV8x16AllTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_true;
diff --git a/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h b/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h
index 0a37dd70683..27418935dd3 100644
--- a/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h
+++ b/chromium/v8/src/compiler/backend/mips/instruction-codes-mips.h
@@ -155,6 +155,8 @@ namespace compiler {
V(MipsF64x2Ne) \
V(MipsF64x2Lt) \
V(MipsF64x2Le) \
+ V(MipsF64x2Pmin) \
+ V(MipsF64x2Pmax) \
V(MipsI64x2Add) \
V(MipsI64x2Sub) \
V(MipsI64x2Mul) \
@@ -196,6 +198,8 @@ namespace compiler {
V(MipsF32x4Ne) \
V(MipsF32x4Lt) \
V(MipsF32x4Le) \
+ V(MipsF32x4Pmin) \
+ V(MipsF32x4Pmax) \
V(MipsI32x4SConvertF32x4) \
V(MipsI32x4UConvertF32x4) \
V(MipsI32x4Neg) \
@@ -204,6 +208,7 @@ namespace compiler {
V(MipsI32x4GtU) \
V(MipsI32x4GeU) \
V(MipsI32x4Abs) \
+ V(MipsI32x4BitMask) \
V(MipsI16x8Splat) \
V(MipsI16x8ExtractLaneU) \
V(MipsI16x8ExtractLaneS) \
@@ -232,6 +237,7 @@ namespace compiler {
V(MipsI16x8GeU) \
V(MipsI16x8RoundingAverageU) \
V(MipsI16x8Abs) \
+ V(MipsI16x8BitMask) \
V(MipsI8x16Splat) \
V(MipsI8x16ExtractLaneU) \
V(MipsI8x16ExtractLaneS) \
@@ -259,18 +265,19 @@ namespace compiler {
V(MipsI8x16GeU) \
V(MipsI8x16RoundingAverageU) \
V(MipsI8x16Abs) \
+ V(MipsI8x16BitMask) \
V(MipsS128And) \
V(MipsS128Or) \
V(MipsS128Xor) \
V(MipsS128Not) \
V(MipsS128Select) \
V(MipsS128AndNot) \
- V(MipsS1x4AnyTrue) \
- V(MipsS1x4AllTrue) \
- V(MipsS1x8AnyTrue) \
- V(MipsS1x8AllTrue) \
- V(MipsS1x16AnyTrue) \
- V(MipsS1x16AllTrue) \
+ V(MipsV32x4AnyTrue) \
+ V(MipsV32x4AllTrue) \
+ V(MipsV16x8AnyTrue) \
+ V(MipsV16x8AllTrue) \
+ V(MipsV8x16AnyTrue) \
+ V(MipsV8x16AllTrue) \
V(MipsS32x4InterleaveRight) \
V(MipsS32x4InterleaveLeft) \
V(MipsS32x4PackEven) \
diff --git a/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc b/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc
index 81bbfbbfb9b..5180a1d4ed0 100644
--- a/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc
+++ b/chromium/v8/src/compiler/backend/mips/instruction-scheduler-mips.cc
@@ -57,6 +57,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsF64x2Splat:
case kMipsF64x2ExtractLane:
case kMipsF64x2ReplaceLane:
+ case kMipsF64x2Pmin:
+ case kMipsF64x2Pmax:
case kMipsI64x2Add:
case kMipsI64x2Sub:
case kMipsI64x2Mul:
@@ -85,6 +87,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsF32x4Splat:
case kMipsF32x4Sub:
case kMipsF32x4UConvertI32x4:
+ case kMipsF32x4Pmin:
+ case kMipsF32x4Pmax:
case kMipsFloat32Max:
case kMipsFloat32Min:
case kMipsFloat32RoundDown:
@@ -138,6 +142,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI16x8UConvertI8x16High:
case kMipsI16x8UConvertI8x16Low:
case kMipsI16x8Abs:
+ case kMipsI16x8BitMask:
case kMipsI32x4Add:
case kMipsI32x4AddHoriz:
case kMipsI32x4Eq:
@@ -166,6 +171,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI32x4UConvertI16x8High:
case kMipsI32x4UConvertI16x8Low:
case kMipsI32x4Abs:
+ case kMipsI32x4BitMask:
case kMipsI8x16Add:
case kMipsI8x16AddSaturateS:
case kMipsI8x16AddSaturateU:
@@ -195,6 +201,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI8x16SubSaturateU:
case kMipsI8x16UConvertI16x8:
case kMipsI8x16Abs:
+ case kMipsI8x16BitMask:
case kMipsIns:
case kMipsLsa:
case kMipsMaddD:
@@ -238,12 +245,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsS16x8InterleaveRight:
case kMipsS16x8PackEven:
case kMipsS16x8PackOdd:
- case kMipsS1x16AllTrue:
- case kMipsS1x16AnyTrue:
- case kMipsS1x4AllTrue:
- case kMipsS1x4AnyTrue:
- case kMipsS1x8AllTrue:
- case kMipsS1x8AnyTrue:
+ case kMipsV8x16AllTrue:
+ case kMipsV8x16AnyTrue:
+ case kMipsV32x4AllTrue:
+ case kMipsV32x4AnyTrue:
+ case kMipsV16x8AllTrue:
+ case kMipsV16x8AnyTrue:
case kMipsS32x4InterleaveEven:
case kMipsS32x4InterleaveLeft:
case kMipsS32x4InterleaveOdd:
diff --git a/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc b/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc
index dac94fae272..2785186b827 100644
--- a/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc
+++ b/chromium/v8/src/compiler/backend/mips/instruction-selector-mips.cc
@@ -113,6 +113,14 @@ static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode,
g.UseRegister(node->InputAt(1)));
}
+static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ MipsOperandGenerator g(selector);
+ selector->Emit(opcode, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)));
+}
+
void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
MipsOperandGenerator g(selector);
selector->Emit(
@@ -2111,12 +2119,12 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8UConvertI8x16High, kMipsI16x8UConvertI8x16High) \
V(I8x16Neg, kMipsI8x16Neg) \
V(S128Not, kMipsS128Not) \
- V(S1x4AnyTrue, kMipsS1x4AnyTrue) \
- V(S1x4AllTrue, kMipsS1x4AllTrue) \
- V(S1x8AnyTrue, kMipsS1x8AnyTrue) \
- V(S1x8AllTrue, kMipsS1x8AllTrue) \
- V(S1x16AnyTrue, kMipsS1x16AnyTrue) \
- V(S1x16AllTrue, kMipsS1x16AllTrue)
+ V(V32x4AnyTrue, kMipsV32x4AnyTrue) \
+ V(V32x4AllTrue, kMipsV32x4AllTrue) \
+ V(V16x8AnyTrue, kMipsV16x8AnyTrue) \
+ V(V16x8AllTrue, kMipsV16x8AllTrue) \
+ V(V8x16AnyTrue, kMipsV8x16AnyTrue) \
+ V(V8x16AllTrue, kMipsV8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
V(I64x2Shl) \
@@ -2172,6 +2180,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4GtU, kMipsI32x4GtU) \
V(I32x4GeU, kMipsI32x4GeU) \
V(I32x4Abs, kMipsI32x4Abs) \
+ V(I32x4BitMask, kMipsI32x4BitMask) \
V(I16x8Add, kMipsI16x8Add) \
V(I16x8AddSaturateS, kMipsI16x8AddSaturateS) \
V(I16x8AddSaturateU, kMipsI16x8AddSaturateU) \
@@ -2194,6 +2203,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4) \
V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \
V(I16x8Abs, kMipsI16x8Abs) \
+ V(I16x8BitMask, kMipsI16x8BitMask) \
V(I8x16Add, kMipsI8x16Add) \
V(I8x16AddSaturateS, kMipsI8x16AddSaturateS) \
V(I8x16AddSaturateU, kMipsI8x16AddSaturateU) \
@@ -2215,6 +2225,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I8x16SConvertI16x8, kMipsI8x16SConvertI16x8) \
V(I8x16UConvertI16x8, kMipsI8x16UConvertI16x8) \
V(I8x16Abs, kMipsI8x16Abs) \
+ V(I8x16BitMask, kMipsI8x16BitMask) \
V(S128And, kMipsS128And) \
V(S128Or, kMipsS128Or) \
V(S128Xor, kMipsS128Xor) \
@@ -2406,6 +2417,22 @@ void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
Emit(kMipsSeh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
}
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+ VisitUniqueRRR(this, kMipsF32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+ VisitUniqueRRR(this, kMipsF32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+ VisitUniqueRRR(this, kMipsF64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+ VisitUniqueRRR(this, kMipsF64x2Pmax, node);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
diff --git a/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc b/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc
index 197167c01cd..9acd6459de5 100644
--- a/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc
+++ b/chromium/v8/src/compiler/backend/mips64/code-generator-mips64.cc
@@ -2265,6 +2265,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputInt8(1));
break;
}
+ case kMips64F64x2Pmin: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = rhs < lhs ? rhs : lhs
+ __ fclt_d(dst, rhs, lhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
+ case kMips64F64x2Pmax: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = lhs < rhs ? rhs : lhs
+ __ fclt_d(dst, lhs, rhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
case kMips64I64x2ReplaceLane: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Simd128Register src = i.InputSimd128Register(0);
@@ -2581,6 +2601,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
+ case kMips64F32x4Pmin: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = rhs < lhs ? rhs : lhs
+ __ fclt_w(dst, rhs, lhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
+ case kMips64F32x4Pmax: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register lhs = i.InputSimd128Register(0);
+ Simd128Register rhs = i.InputSimd128Register(1);
+ // dst = lhs < rhs ? rhs : lhs
+ __ fclt_w(dst, lhs, rhs);
+ __ bsel_v(dst, lhs, rhs);
+ break;
+ }
case kMips64I32x4SConvertF32x4: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
@@ -2634,6 +2674,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero);
break;
}
+ case kMips64I32x4BitMask: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Register dst = i.OutputRegister();
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register scratch0 = kSimd128RegZero;
+ Simd128Register scratch1 = kSimd128ScratchReg;
+ __ srli_w(scratch0, src, 31);
+ __ srli_d(scratch1, scratch0, 31);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ shf_w(scratch1, scratch0, 0x0E);
+ __ slli_d(scratch1, scratch1, 2);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ copy_u_b(dst, scratch0, 0);
+ break;
+ }
case kMips64I16x8Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_h(i.OutputSimd128Register(), i.InputRegister(0));
@@ -2820,6 +2875,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero);
break;
}
+ case kMips64I16x8BitMask: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Register dst = i.OutputRegister();
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register scratch0 = kSimd128RegZero;
+ Simd128Register scratch1 = kSimd128ScratchReg;
+ __ srli_h(scratch0, src, 15);
+ __ srli_w(scratch1, scratch0, 15);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ srli_d(scratch1, scratch0, 30);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ shf_w(scratch1, scratch0, 0x0E);
+ __ slli_d(scratch1, scratch1, 4);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ copy_u_b(dst, scratch0, 0);
+ break;
+ }
case kMips64I8x16Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_b(i.OutputSimd128Register(), i.InputRegister(0));
@@ -3006,6 +3078,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero);
break;
}
+ case kMips64I8x16BitMask: {
+ CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+ Register dst = i.OutputRegister();
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register scratch0 = kSimd128RegZero;
+ Simd128Register scratch1 = kSimd128ScratchReg;
+ __ srli_b(scratch0, src, 7);
+ __ srli_h(scratch1, scratch0, 7);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ srli_w(scratch1, scratch0, 14);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ srli_d(scratch1, scratch0, 28);
+ __ or_v(scratch0, scratch0, scratch1);
+ __ shf_w(scratch1, scratch0, 0x0E);
+ __ ilvev_b(scratch0, scratch1, scratch0);
+ __ copy_u_h(dst, scratch0, 0);
+ break;
+ }
case kMips64S128And: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
@@ -3030,9 +3120,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0));
break;
}
- case kMips64S1x4AnyTrue:
- case kMips64S1x8AnyTrue:
- case kMips64S1x16AnyTrue: {
+ case kMips64V32x4AnyTrue:
+ case kMips64V16x8AnyTrue:
+ case kMips64V8x16AnyTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_false;
@@ -3043,7 +3133,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bind(&all_false);
break;
}
- case kMips64S1x4AllTrue: {
+ case kMips64V32x4AllTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_true;
@@ -3054,7 +3144,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bind(&all_true);
break;
}
- case kMips64S1x8AllTrue: {
+ case kMips64V16x8AllTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_true;
@@ -3065,7 +3155,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bind(&all_true);
break;
}
- case kMips64S1x16AllTrue: {
+ case kMips64V8x16AllTrue: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Label all_true;
diff --git a/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h b/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h
index 9303b4572f3..0c42c059ea5 100644
--- a/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h
+++ b/chromium/v8/src/compiler/backend/mips64/instruction-codes-mips64.h
@@ -203,6 +203,8 @@ namespace compiler {
V(Mips64F64x2Splat) \
V(Mips64F64x2ExtractLane) \
V(Mips64F64x2ReplaceLane) \
+ V(Mips64F64x2Pmin) \
+ V(Mips64F64x2Pmax) \
V(Mips64I64x2Splat) \
V(Mips64I64x2ExtractLane) \
V(Mips64I64x2ReplaceLane) \
@@ -229,6 +231,8 @@ namespace compiler {
V(Mips64F32x4Ne) \
V(Mips64F32x4Lt) \
V(Mips64F32x4Le) \
+ V(Mips64F32x4Pmin) \
+ V(Mips64F32x4Pmax) \
V(Mips64I32x4SConvertF32x4) \
V(Mips64I32x4UConvertF32x4) \
V(Mips64I32x4Neg) \
@@ -237,6 +241,7 @@ namespace compiler {
V(Mips64I32x4GtU) \
V(Mips64I32x4GeU) \
V(Mips64I32x4Abs) \
+ V(Mips64I32x4BitMask) \
V(Mips64I16x8Splat) \
V(Mips64I16x8ExtractLaneU) \
V(Mips64I16x8ExtractLaneS) \
@@ -265,6 +270,7 @@ namespace compiler {
V(Mips64I16x8GeU) \
V(Mips64I16x8RoundingAverageU) \
V(Mips64I16x8Abs) \
+ V(Mips64I16x8BitMask) \
V(Mips64I8x16Splat) \
V(Mips64I8x16ExtractLaneU) \
V(Mips64I8x16ExtractLaneS) \
@@ -292,18 +298,19 @@ namespace compiler {
V(Mips64I8x16GeU) \
V(Mips64I8x16RoundingAverageU) \
V(Mips64I8x16Abs) \
+ V(Mips64I8x16BitMask) \
V(Mips64S128And) \
V(Mips64S128Or) \
V(Mips64S128Xor) \
V(Mips64S128Not) \
V(Mips64S128Select) \
V(Mips64S128AndNot) \
- V(Mips64S1x4AnyTrue) \
- V(Mips64S1x4AllTrue) \
- V(Mips64S1x8AnyTrue) \
- V(Mips64S1x8AllTrue) \
- V(Mips64S1x16AnyTrue) \
- V(Mips64S1x16AllTrue) \
+ V(Mips64V32x4AnyTrue) \
+ V(Mips64V32x4AllTrue) \
+ V(Mips64V16x8AnyTrue) \
+ V(Mips64V16x8AllTrue) \
+ V(Mips64V8x16AnyTrue) \
+ V(Mips64V8x16AllTrue) \
V(Mips64S32x4InterleaveRight) \
V(Mips64S32x4InterleaveLeft) \
V(Mips64S32x4PackEven) \
diff --git a/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
index 81fc3b2ca9a..2f8a2722015 100644
--- a/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
+++ b/chromium/v8/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
@@ -82,6 +82,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64F64x2Ne:
case kMips64F64x2Lt:
case kMips64F64x2Le:
+ case kMips64F64x2Pmin:
+ case kMips64F64x2Pmax:
case kMips64I64x2Splat:
case kMips64I64x2ExtractLane:
case kMips64I64x2ReplaceLane:
@@ -113,6 +115,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64F32x4Splat:
case kMips64F32x4Sub:
case kMips64F32x4UConvertI32x4:
+ case kMips64F32x4Pmin:
+ case kMips64F32x4Pmax:
case kMips64F64x2Splat:
case kMips64F64x2ExtractLane:
case kMips64F64x2ReplaceLane:
@@ -171,6 +175,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I16x8UConvertI8x16Low:
case kMips64I16x8RoundingAverageU:
case kMips64I16x8Abs:
+ case kMips64I16x8BitMask:
case kMips64I32x4Add:
case kMips64I32x4AddHoriz:
case kMips64I32x4Eq:
@@ -199,6 +204,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I32x4UConvertI16x8High:
case kMips64I32x4UConvertI16x8Low:
case kMips64I32x4Abs:
+ case kMips64I32x4BitMask:
case kMips64I8x16Add:
case kMips64I8x16AddSaturateS:
case kMips64I8x16AddSaturateU:
@@ -226,6 +232,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I8x16SubSaturateU:
case kMips64I8x16RoundingAverageU:
case kMips64I8x16Abs:
+ case kMips64I8x16BitMask:
case kMips64Ins:
case kMips64Lsa:
case kMips64MaxD:
@@ -265,12 +272,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64S16x8PackOdd:
case kMips64S16x2Reverse:
case kMips64S16x4Reverse:
- case kMips64S1x16AllTrue:
- case kMips64S1x16AnyTrue:
- case kMips64S1x4AllTrue:
- case kMips64S1x4AnyTrue:
- case kMips64S1x8AllTrue:
- case kMips64S1x8AnyTrue:
+ case kMips64V8x16AllTrue:
+ case kMips64V8x16AnyTrue:
+ case kMips64V32x4AllTrue:
+ case kMips64V32x4AnyTrue:
+ case kMips64V16x8AllTrue:
+ case kMips64V16x8AnyTrue:
case kMips64S32x4InterleaveEven:
case kMips64S32x4InterleaveOdd:
case kMips64S32x4InterleaveLeft:
diff --git a/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc b/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc
index 719a916b6a5..2c9c8d439b6 100644
--- a/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc
+++ b/chromium/v8/src/compiler/backend/mips64/instruction-selector-mips64.cc
@@ -163,6 +163,14 @@ static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode,
g.UseRegister(node->InputAt(1)));
}
+static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode,
+ Node* node) {
+ Mips64OperandGenerator g(selector);
+ selector->Emit(opcode, g.DefineAsRegister(node),
+ g.UseUniqueRegister(node->InputAt(0)),
+ g.UseUniqueRegister(node->InputAt(1)));
+}
+
void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
Mips64OperandGenerator g(selector);
selector->Emit(
@@ -2778,21 +2786,24 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4UConvertI16x8Low, kMips64I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High, kMips64I32x4UConvertI16x8High) \
V(I32x4Abs, kMips64I32x4Abs) \
+ V(I32x4BitMask, kMips64I32x4BitMask) \
V(I16x8Neg, kMips64I16x8Neg) \
V(I16x8SConvertI8x16Low, kMips64I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High, kMips64I16x8SConvertI8x16High) \
V(I16x8UConvertI8x16Low, kMips64I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High, kMips64I16x8UConvertI8x16High) \
V(I16x8Abs, kMips64I16x8Abs) \
+ V(I16x8BitMask, kMips64I16x8BitMask) \
V(I8x16Neg, kMips64I8x16Neg) \
V(I8x16Abs, kMips64I8x16Abs) \
+ V(I8x16BitMask, kMips64I8x16BitMask) \
V(S128Not, kMips64S128Not) \
- V(S1x4AnyTrue, kMips64S1x4AnyTrue) \
- V(S1x4AllTrue, kMips64S1x4AllTrue) \
- V(S1x8AnyTrue, kMips64S1x8AnyTrue) \
- V(S1x8AllTrue, kMips64S1x8AllTrue) \
- V(S1x16AnyTrue, kMips64S1x16AnyTrue) \
- V(S1x16AllTrue, kMips64S1x16AllTrue)
+ V(V32x4AnyTrue, kMips64V32x4AnyTrue) \
+ V(V32x4AllTrue, kMips64V32x4AllTrue) \
+ V(V16x8AnyTrue, kMips64V16x8AnyTrue) \
+ V(V16x8AllTrue, kMips64V16x8AllTrue) \
+ V(V8x16AnyTrue, kMips64V8x16AnyTrue) \
+ V(V8x16AllTrue, kMips64V8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
V(I64x2Shl) \
@@ -3099,6 +3110,22 @@ void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
g.TempImmediate(0));
}
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+ VisitUniqueRRR(this, kMips64F32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+ VisitUniqueRRR(this, kMips64F32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+ VisitUniqueRRR(this, kMips64F64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+ VisitUniqueRRR(this, kMips64F64x2Pmax, node);
+}
+
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
diff --git a/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc b/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc
index b7fece3f72d..56c5003d2e8 100644
--- a/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc
+++ b/chromium/v8/src/compiler/backend/ppc/code-generator-ppc.cc
@@ -1039,7 +1039,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
int offset = (FLAG_enable_embedded_constant_pool ? 20 : 23) * kInstrSize;
-#if defined(_AIX)
+#if ABI_USES_FUNCTION_DESCRIPTORS
// AIX/PPC64BE Linux uses a function descriptor
int kNumParametersMask = kHasFunctionDescriptorBitMask - 1;
num_parameters = kNumParametersMask & misc_field;
@@ -2164,6 +2164,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vsro(dst, dst, kScratchDoubleReg);
// reload
+ __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ mtvsrd(kScratchDoubleReg, r0);
__ vor(dst, dst, kScratchDoubleReg);
break;
@@ -2186,6 +2187,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
__ vsro(dst, dst, kScratchDoubleReg);
// reload
+ __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ mtvsrd(kScratchDoubleReg, src);
__ vor(dst, dst, kScratchDoubleReg);
break;
@@ -2208,46 +2210,709 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vspltb(dst, dst, Operand(7));
break;
}
+#define SHIFT_TO_CORRECT_LANE(starting_lane_nummber, lane_input, \
+ lane_width_in_bytes, input_register) \
+ int shift_bits = abs(lane_input - starting_lane_nummber) * \
+ lane_width_in_bytes * kBitsPerByte; \
+ if (shift_bits > 0) { \
+ __ li(ip, Operand(shift_bits)); \
+ __ mtvsrd(kScratchDoubleReg, ip); \
+ __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
+ if (lane_input < starting_lane_nummber) { \
+ __ vsro(kScratchDoubleReg, input_register, kScratchDoubleReg); \
+ } else { \
+ DCHECK(lane_input > starting_lane_nummber); \
+ __ vslo(kScratchDoubleReg, input_register, kScratchDoubleReg); \
+ } \
+ input_register = kScratchDoubleReg; \
+ }
case kPPC_F64x2ExtractLane: {
- __ mfvsrd(kScratchReg, i.InputSimd128Register(0));
+ int32_t lane = 1 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(0, lane, 8, src);
+ __ mfvsrd(kScratchReg, src);
__ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
break;
}
case kPPC_F32x4ExtractLane: {
- __ mfvsrwz(kScratchReg, i.InputSimd128Register(0));
+ int32_t lane = 3 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
+ __ mfvsrwz(kScratchReg, src);
__ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
break;
}
case kPPC_I64x2ExtractLane: {
- __ mfvsrd(i.OutputRegister(), i.InputSimd128Register(0));
+ int32_t lane = 1 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(0, lane, 8, src)
+ __ mfvsrd(i.OutputRegister(), src);
break;
}
case kPPC_I32x4ExtractLane: {
- __ mfvsrwz(i.OutputRegister(), i.InputSimd128Register(0));
+ int32_t lane = 3 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(1, lane, 4, src)
+ __ mfvsrwz(i.OutputRegister(), src);
break;
}
case kPPC_I16x8ExtractLaneU: {
- __ mfvsrwz(r0, i.InputSimd128Register(0));
+ int32_t lane = 7 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
+ __ mfvsrwz(r0, src);
__ li(ip, Operand(16));
__ srd(i.OutputRegister(), r0, ip);
break;
}
case kPPC_I16x8ExtractLaneS: {
- __ mfvsrwz(kScratchReg, i.InputSimd128Register(0));
+ int32_t lane = 7 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(2, lane, 2, src)
+ __ mfvsrwz(kScratchReg, src);
__ sradi(i.OutputRegister(), kScratchReg, 16);
break;
}
case kPPC_I8x16ExtractLaneU: {
- __ mfvsrwz(r0, i.InputSimd128Register(0));
+ int32_t lane = 15 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
+ __ mfvsrwz(r0, src);
__ li(ip, Operand(24));
__ srd(i.OutputRegister(), r0, ip);
break;
}
case kPPC_I8x16ExtractLaneS: {
- __ mfvsrwz(kScratchReg, i.InputSimd128Register(0));
+ int32_t lane = 15 - i.InputInt8(1);
+ Simd128Register src = i.InputSimd128Register(0);
+ SHIFT_TO_CORRECT_LANE(4, lane, 1, src)
+ __ mfvsrwz(kScratchReg, src);
__ sradi(i.OutputRegister(), kScratchReg, 24);
break;
}
+#undef SHIFT_TO_CORRECT_LANE
+#define GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane, \
+ lane_width_in_bytes) \
+ uint64_t mask = 0; \
+ for (int i = 0, j = 0; i <= kSimd128Size - 1; i++) { \
+ mask <<= kBitsPerByte; \
+ if (i >= lane * lane_width_in_bytes && \
+ i < lane * lane_width_in_bytes + lane_width_in_bytes) { \
+ mask |= replacement_value_byte_lane + j; \
+ j++; \
+ } else { \
+ mask |= i; \
+ } \
+ if (i == (kSimd128Size / 2) - 1) { \
+ __ mov(r0, Operand(mask)); \
+ mask = 0; \
+ } else if (i >= kSimd128Size - 1) { \
+ __ mov(ip, Operand(mask)); \
+ } \
+ } \
+ /* Need to maintain 16 byte alignment for lvx */ \
+ __ addi(sp, sp, Operand(-24)); \
+ __ StoreP(ip, MemOperand(sp, 0)); \
+ __ StoreP(r0, MemOperand(sp, 8)); \
+ __ li(r0, Operand(0)); \
+ __ lvx(kScratchDoubleReg, MemOperand(sp, r0)); \
+ __ addi(sp, sp, Operand(24));
+ case kPPC_F64x2ReplaceLane: {
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register dst = i.OutputSimd128Register();
+ int32_t lane = 1 - i.InputInt8(1);
+ constexpr int replacement_value_byte_lane = 16;
+ constexpr int lane_width_in_bytes = 8;
+ GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
+ lane_width_in_bytes)
+ __ MovDoubleToInt64(r0, i.InputDoubleRegister(2));
+ __ mtvsrd(dst, r0);
+ __ vperm(dst, src, dst, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_F32x4ReplaceLane: {
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register dst = i.OutputSimd128Register();
+ int32_t lane = 3 - i.InputInt8(1);
+ constexpr int replacement_value_byte_lane = 20;
+ constexpr int lane_width_in_bytes = 4;
+ GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
+ lane_width_in_bytes)
+ __ MovFloatToInt(kScratchReg, i.InputDoubleRegister(2));
+ __ mtvsrd(dst, kScratchReg);
+ __ vperm(dst, src, dst, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I64x2ReplaceLane: {
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register dst = i.OutputSimd128Register();
+ int32_t lane = 1 - i.InputInt8(1);
+ constexpr int replacement_value_byte_lane = 16;
+ constexpr int lane_width_in_bytes = 8;
+ GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
+ lane_width_in_bytes)
+ __ mtvsrd(dst, i.InputRegister(2));
+ __ vperm(dst, src, dst, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I32x4ReplaceLane: {
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register dst = i.OutputSimd128Register();
+ int32_t lane = 3 - i.InputInt8(1);
+ constexpr int replacement_value_byte_lane = 20;
+ constexpr int lane_width_in_bytes = 4;
+ GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
+ lane_width_in_bytes)
+ __ mtvsrd(dst, i.InputRegister(2));
+ __ vperm(dst, src, dst, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I16x8ReplaceLane: {
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register dst = i.OutputSimd128Register();
+ int32_t lane = 7 - i.InputInt8(1);
+ constexpr int replacement_value_byte_lane = 22;
+ constexpr int lane_width_in_bytes = 2;
+ GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
+ lane_width_in_bytes)
+ __ mtvsrd(dst, i.InputRegister(2));
+ __ vperm(dst, src, dst, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I8x16ReplaceLane: {
+ Simd128Register src = i.InputSimd128Register(0);
+ Simd128Register dst = i.OutputSimd128Register();
+ int32_t lane = 15 - i.InputInt8(1);
+ constexpr int replacement_value_byte_lane = 23;
+ constexpr int lane_width_in_bytes = 1;
+ GENERATE_REPLACE_LANE_MASK(lane, replacement_value_byte_lane,
+ lane_width_in_bytes)
+ __ mtvsrd(dst, i.InputRegister(2));
+ __ vperm(dst, src, dst, kScratchDoubleReg);
+ break;
+ }
+#undef GENERATE_REPLACE_LANE_MASK
+ case kPPC_F64x2Add: {
+ __ xvadddp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F64x2Sub: {
+ __ xvsubdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F64x2Mul: {
+ __ xvmuldp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F32x4Add: {
+ __ vaddfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F32x4AddHoriz: {
+ Simd128Register src0 = i.InputSimd128Register(0);
+ Simd128Register src1 = i.InputSimd128Register(1);
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+ Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
+ constexpr int shift_bits = 32;
+ // generate first operand
+ __ vpkudum(dst, src1, src0);
+ // generate second operand
+ __ li(ip, Operand(shift_bits));
+ __ mtvsrd(tempFPReg2, ip);
+ __ vspltb(tempFPReg2, tempFPReg2, Operand(7));
+ __ vsro(tempFPReg1, src0, tempFPReg2);
+ __ vsro(tempFPReg2, src1, tempFPReg2);
+ __ vpkudum(kScratchDoubleReg, tempFPReg2, tempFPReg1);
+ // add the operands
+ __ vaddfp(dst, kScratchDoubleReg, dst);
+ break;
+ }
+ case kPPC_F32x4Sub: {
+ __ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F32x4Mul: {
+ __ xvmulsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2Add: {
+ __ vaddudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2Sub: {
+ __ vsubudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2Mul: {
+ // Need to maintain 16 byte alignment for stvx and lvx.
+ __ addi(sp, sp, Operand(-40));
+ __ li(r0, Operand(0));
+ __ stvx(i.InputSimd128Register(0), MemOperand(sp, r0));
+ __ li(r0, Operand(16));
+ __ stvx(i.InputSimd128Register(1), MemOperand(sp, r0));
+ for (int i = 0; i < 2; i++) {
+ __ LoadP(r0, MemOperand(sp, kBitsPerByte * i));
+ __ LoadP(ip, MemOperand(sp, (kBitsPerByte * i) + kSimd128Size));
+ __ mulld(r0, r0, ip);
+ __ StoreP(r0, MemOperand(sp, i * kBitsPerByte));
+ }
+ __ li(r0, Operand(0));
+ __ lvx(i.OutputSimd128Register(), MemOperand(sp, r0));
+ __ addi(sp, sp, Operand(40));
+ break;
+ }
+ case kPPC_I32x4Add: {
+ __ vadduwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4AddHoriz: {
+ Simd128Register src0 = i.InputSimd128Register(0);
+ Simd128Register src1 = i.InputSimd128Register(1);
+ Simd128Register dst = i.OutputSimd128Register();
+ __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vsum2sws(dst, src0, kScratchDoubleReg);
+ __ vsum2sws(kScratchDoubleReg, src1, kScratchDoubleReg);
+ __ vpkudum(dst, kScratchDoubleReg, dst);
+ break;
+ }
+ case kPPC_I32x4Sub: {
+ __ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4Mul: {
+ __ vmuluwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8Add: {
+ __ vadduhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8AddHoriz: {
+ Simd128Register src0 = i.InputSimd128Register(0);
+ Simd128Register src1 = i.InputSimd128Register(1);
+ Simd128Register dst = i.OutputSimd128Register();
+ __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vsum4shs(dst, src0, kScratchDoubleReg);
+ __ vsum4shs(kScratchDoubleReg, src1, kScratchDoubleReg);
+ __ vpkuwus(dst, kScratchDoubleReg, dst);
+ break;
+ }
+ case kPPC_I16x8Sub: {
+ __ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8Mul: {
+ __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+ __ vmladduhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1), kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I8x16Add: {
+ __ vaddubm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16Sub: {
+ __ vsububm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16Mul: {
+ __ vmuleub(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vmuloub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vpkuhum(i.OutputSimd128Register(), kScratchDoubleReg,
+ i.OutputSimd128Register());
+ break;
+ }
+ case kPPC_I64x2MinS: {
+ __ vminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4MinS: {
+ __ vminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2MinU: {
+ __ vminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4MinU: {
+ __ vminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8MinS: {
+ __ vminsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8MinU: {
+ __ vminuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16MinS: {
+ __ vminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16MinU: {
+ __ vminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2MaxS: {
+ __ vmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4MaxS: {
+ __ vmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2MaxU: {
+ __ vmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4MaxU: {
+ __ vmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8MaxS: {
+ __ vmaxsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8MaxU: {
+ __ vmaxuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16MaxS: {
+ __ vmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16MaxU: {
+ __ vmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F64x2Eq: {
+ __ xvcmpeqdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F64x2Ne: {
+ __ xvcmpeqdp(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_F64x2Le: {
+ __ xvcmpgedp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(0));
+ break;
+ }
+ case kPPC_F64x2Lt: {
+ __ xvcmpgtdp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(0));
+ break;
+ }
+ case kPPC_F32x4Eq: {
+ __ xvcmpeqsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2Eq: {
+ __ vcmpequd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4Eq: {
+ __ vcmpequw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8Eq: {
+ __ vcmpequh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16Eq: {
+ __ vcmpequb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_F32x4Ne: {
+ __ xvcmpeqsp(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I64x2Ne: {
+ __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I32x4Ne: {
+ __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I16x8Ne: {
+ __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I8x16Ne: {
+ __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+ break;
+ }
+ case kPPC_F32x4Lt: {
+ __ xvcmpgtsp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(0));
+ break;
+ }
+ case kPPC_F32x4Le: {
+ __ xvcmpgesp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+ i.InputSimd128Register(0));
+ break;
+ }
+ case kPPC_I64x2GtS: {
+ __ vcmpgtsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4GtS: {
+ __ vcmpgtsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I64x2GeS: {
+ __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I32x4GeS: {
+ __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I64x2GtU: {
+ __ vcmpgtud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I32x4GtU: {
+ __ vcmpgtuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+
+ break;
+ }
+ case kPPC_I64x2GeU: {
+ __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+
+ break;
+ }
+ case kPPC_I32x4GeU: {
+ __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I16x8GtS: {
+ __ vcmpgtsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8GeS: {
+ __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I16x8GtU: {
+ __ vcmpgtuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I16x8GeU: {
+ __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I8x16GtS: {
+ __ vcmpgtsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16GeS: {
+ __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+ case kPPC_I8x16GtU: {
+ __ vcmpgtub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ break;
+ }
+ case kPPC_I8x16GeU: {
+ __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vcmpgtub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1));
+ __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+ kScratchDoubleReg);
+ break;
+ }
+#define VECTOR_SHIFT(op) \
+ { \
+ __ mtvsrd(kScratchDoubleReg, i.InputRegister(1)); \
+ __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
+ __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), \
+ kScratchDoubleReg); \
+ }
+ case kPPC_I64x2Shl: {
+ VECTOR_SHIFT(vsld)
+ break;
+ }
+ case kPPC_I64x2ShrS: {
+ VECTOR_SHIFT(vsrad)
+ break;
+ }
+ case kPPC_I64x2ShrU: {
+ VECTOR_SHIFT(vsrd)
+ break;
+ }
+ case kPPC_I32x4Shl: {
+ VECTOR_SHIFT(vslw)
+ break;
+ }
+ case kPPC_I32x4ShrS: {
+ VECTOR_SHIFT(vsraw)
+ break;
+ }
+ case kPPC_I32x4ShrU: {
+ VECTOR_SHIFT(vsrw)
+ break;
+ }
+ case kPPC_I16x8Shl: {
+ VECTOR_SHIFT(vslh)
+ break;
+ }
+ case kPPC_I16x8ShrS: {
+ VECTOR_SHIFT(vsrah)
+ break;
+ }
+ case kPPC_I16x8ShrU: {
+ VECTOR_SHIFT(vsrh)
+ break;
+ }
+ case kPPC_I8x16Shl: {
+ VECTOR_SHIFT(vslb)
+ break;
+ }
+ case kPPC_I8x16ShrS: {
+ VECTOR_SHIFT(vsrab)
+ break;
+ }
+ case kPPC_I8x16ShrU: {
+ VECTOR_SHIFT(vsrb)
+ break;
+ }
+#undef VECTOR_SHIFT
+ case kPPC_S128And: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register src = i.InputSimd128Register(1);
+ __ vand(dst, i.InputSimd128Register(0), src);
+ break;
+ }
+ case kPPC_S128Or: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register src = i.InputSimd128Register(1);
+ __ vor(dst, i.InputSimd128Register(0), src);
+ break;
+ }
+ case kPPC_S128Xor: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register src = i.InputSimd128Register(1);
+ __ vxor(dst, i.InputSimd128Register(0), src);
+ break;
+ }
+ case kPPC_S128Zero: {
+ Simd128Register dst = i.OutputSimd128Register();
+ __ vxor(dst, dst, dst);
+ break;
+ }
+ case kPPC_S128Not: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register src = i.InputSimd128Register(1);
+ __ vnor(dst, i.InputSimd128Register(0), src);
+ break;
+ }
+ case kPPC_S128Select: {
+ Simd128Register dst = i.OutputSimd128Register();
+ Simd128Register mask = i.InputSimd128Register(0);
+ Simd128Register src1 = i.InputSimd128Register(1);
+ Simd128Register src2 = i.InputSimd128Register(2);
+ __ vsel(dst, src2, src1, mask);
+ break;
+ }
case kPPC_StoreCompressTagged: {
ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
break;
diff --git a/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h b/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h
index 4f6aeced6da..fdffc5f0963 100644
--- a/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h
+++ b/chromium/v8/src/compiler/backend/ppc/instruction-codes-ppc.h
@@ -192,18 +192,111 @@ namespace compiler {
V(PPC_AtomicXorInt64) \
V(PPC_F64x2Splat) \
V(PPC_F64x2ExtractLane) \
+ V(PPC_F64x2ReplaceLane) \
+ V(PPC_F64x2Add) \
+ V(PPC_F64x2Sub) \
+ V(PPC_F64x2Mul) \
+ V(PPC_F64x2Eq) \
+ V(PPC_F64x2Ne) \
+ V(PPC_F64x2Le) \
+ V(PPC_F64x2Lt) \
V(PPC_F32x4Splat) \
V(PPC_F32x4ExtractLane) \
+ V(PPC_F32x4ReplaceLane) \
+ V(PPC_F32x4Add) \
+ V(PPC_F32x4AddHoriz) \
+ V(PPC_F32x4Sub) \
+ V(PPC_F32x4Mul) \
+ V(PPC_F32x4Eq) \
+ V(PPC_F32x4Ne) \
+ V(PPC_F32x4Lt) \
+ V(PPC_F32x4Le) \
V(PPC_I64x2Splat) \
V(PPC_I64x2ExtractLane) \
+ V(PPC_I64x2ReplaceLane) \
+ V(PPC_I64x2Add) \
+ V(PPC_I64x2Sub) \
+ V(PPC_I64x2Mul) \
+ V(PPC_I64x2MinS) \
+ V(PPC_I64x2MinU) \
+ V(PPC_I64x2MaxS) \
+ V(PPC_I64x2MaxU) \
+ V(PPC_I64x2Eq) \
+ V(PPC_I64x2Ne) \
+ V(PPC_I64x2GtS) \
+ V(PPC_I64x2GtU) \
+ V(PPC_I64x2GeU) \
+ V(PPC_I64x2GeS) \
+ V(PPC_I64x2Shl) \
+ V(PPC_I64x2ShrS) \
+ V(PPC_I64x2ShrU) \
V(PPC_I32x4Splat) \
V(PPC_I32x4ExtractLane) \
+ V(PPC_I32x4ReplaceLane) \
+ V(PPC_I32x4Add) \
+ V(PPC_I32x4AddHoriz) \
+ V(PPC_I32x4Sub) \
+ V(PPC_I32x4Mul) \
+ V(PPC_I32x4MinS) \
+ V(PPC_I32x4MinU) \
+ V(PPC_I32x4MaxS) \
+ V(PPC_I32x4MaxU) \
+ V(PPC_I32x4Eq) \
+ V(PPC_I32x4Ne) \
+ V(PPC_I32x4GtS) \
+ V(PPC_I32x4GeS) \
+ V(PPC_I32x4GtU) \
+ V(PPC_I32x4GeU) \
+ V(PPC_I32x4Shl) \
+ V(PPC_I32x4ShrS) \
+ V(PPC_I32x4ShrU) \
V(PPC_I16x8Splat) \
V(PPC_I16x8ExtractLaneU) \
V(PPC_I16x8ExtractLaneS) \
+ V(PPC_I16x8ReplaceLane) \
+ V(PPC_I16x8Add) \
+ V(PPC_I16x8AddHoriz) \
+ V(PPC_I16x8Sub) \
+ V(PPC_I16x8Mul) \
+ V(PPC_I16x8MinS) \
+ V(PPC_I16x8MinU) \
+ V(PPC_I16x8MaxS) \
+ V(PPC_I16x8MaxU) \
+ V(PPC_I16x8Eq) \
+ V(PPC_I16x8Ne) \
+ V(PPC_I16x8GtS) \
+ V(PPC_I16x8GeS) \
+ V(PPC_I16x8GtU) \
+ V(PPC_I16x8GeU) \
+ V(PPC_I16x8Shl) \
+ V(PPC_I16x8ShrS) \
+ V(PPC_I16x8ShrU) \
V(PPC_I8x16Splat) \
V(PPC_I8x16ExtractLaneU) \
V(PPC_I8x16ExtractLaneS) \
+ V(PPC_I8x16ReplaceLane) \
+ V(PPC_I8x16Add) \
+ V(PPC_I8x16Sub) \
+ V(PPC_I8x16Mul) \
+ V(PPC_I8x16MinS) \
+ V(PPC_I8x16MinU) \
+ V(PPC_I8x16MaxS) \
+ V(PPC_I8x16MaxU) \
+ V(PPC_I8x16Eq) \
+ V(PPC_I8x16Ne) \
+ V(PPC_I8x16GtS) \
+ V(PPC_I8x16GeS) \
+ V(PPC_I8x16GtU) \
+ V(PPC_I8x16GeU) \
+ V(PPC_I8x16Shl) \
+ V(PPC_I8x16ShrS) \
+ V(PPC_I8x16ShrU) \
+ V(PPC_S128And) \
+ V(PPC_S128Or) \
+ V(PPC_S128Xor) \
+ V(PPC_S128Zero) \
+ V(PPC_S128Not) \
+ V(PPC_S128Select) \
V(PPC_StoreCompressTagged) \
V(PPC_LoadDecompressTaggedSigned) \
V(PPC_LoadDecompressTaggedPointer) \
diff --git a/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc b/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
index 68d0aaedc4b..b1d124432ef 100644
--- a/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
+++ b/chromium/v8/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
@@ -115,18 +115,111 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_CompressAny:
case kPPC_F64x2Splat:
case kPPC_F64x2ExtractLane:
+ case kPPC_F64x2ReplaceLane:
+ case kPPC_F64x2Add:
+ case kPPC_F64x2Sub:
+ case kPPC_F64x2Mul:
+ case kPPC_F64x2Eq:
+ case kPPC_F64x2Ne:
+ case kPPC_F64x2Le:
+ case kPPC_F64x2Lt:
case kPPC_F32x4Splat:
case kPPC_F32x4ExtractLane:
+ case kPPC_F32x4ReplaceLane:
+ case kPPC_F32x4Add:
+ case kPPC_F32x4AddHoriz:
+ case kPPC_F32x4Sub:
+ case kPPC_F32x4Mul:
+ case kPPC_F32x4Eq:
+ case kPPC_F32x4Ne:
+ case kPPC_F32x4Lt:
+ case kPPC_F32x4Le:
case kPPC_I64x2Splat:
case kPPC_I64x2ExtractLane:
+ case kPPC_I64x2ReplaceLane:
+ case kPPC_I64x2Add:
+ case kPPC_I64x2Sub:
+ case kPPC_I64x2Mul:
+ case kPPC_I64x2MinS:
+ case kPPC_I64x2MinU:
+ case kPPC_I64x2MaxS:
+ case kPPC_I64x2MaxU:
+ case kPPC_I64x2Eq:
+ case kPPC_I64x2Ne:
+ case kPPC_I64x2GtS:
+ case kPPC_I64x2GtU:
+ case kPPC_I64x2GeU:
+ case kPPC_I64x2GeS:
+ case kPPC_I64x2Shl:
+ case kPPC_I64x2ShrS:
+ case kPPC_I64x2ShrU:
case kPPC_I32x4Splat:
case kPPC_I32x4ExtractLane:
+ case kPPC_I32x4ReplaceLane:
+ case kPPC_I32x4Add:
+ case kPPC_I32x4AddHoriz:
+ case kPPC_I32x4Sub:
+ case kPPC_I32x4Mul:
+ case kPPC_I32x4MinS:
+ case kPPC_I32x4MinU:
+ case kPPC_I32x4MaxS:
+ case kPPC_I32x4MaxU:
+ case kPPC_I32x4Eq:
+ case kPPC_I32x4Ne:
+ case kPPC_I32x4GtS:
+ case kPPC_I32x4GeS:
+ case kPPC_I32x4GtU:
+ case kPPC_I32x4GeU:
+ case kPPC_I32x4Shl:
+ case kPPC_I32x4ShrS:
+ case kPPC_I32x4ShrU:
case kPPC_I16x8Splat:
case kPPC_I16x8ExtractLaneU:
case kPPC_I16x8ExtractLaneS:
+ case kPPC_I16x8ReplaceLane:
+ case kPPC_I16x8Add:
+ case kPPC_I16x8AddHoriz:
+ case kPPC_I16x8Sub:
+ case kPPC_I16x8Mul:
+ case kPPC_I16x8MinS:
+ case kPPC_I16x8MinU:
+ case kPPC_I16x8MaxS:
+ case kPPC_I16x8MaxU:
+ case kPPC_I16x8Eq:
+ case kPPC_I16x8Ne:
+ case kPPC_I16x8GtS:
+ case kPPC_I16x8GeS:
+ case kPPC_I16x8GtU:
+ case kPPC_I16x8GeU:
+ case kPPC_I16x8Shl:
+ case kPPC_I16x8ShrS:
+ case kPPC_I16x8ShrU:
case kPPC_I8x16Splat:
case kPPC_I8x16ExtractLaneU:
case kPPC_I8x16ExtractLaneS:
+ case kPPC_I8x16ReplaceLane:
+ case kPPC_I8x16Add:
+ case kPPC_I8x16Sub:
+ case kPPC_I8x16Mul:
+ case kPPC_I8x16MinS:
+ case kPPC_I8x16MinU:
+ case kPPC_I8x16MaxS:
+ case kPPC_I8x16MaxU:
+ case kPPC_I8x16Eq:
+ case kPPC_I8x16Ne:
+ case kPPC_I8x16GtS:
+ case kPPC_I8x16GeS:
+ case kPPC_I8x16GtU:
+ case kPPC_I8x16GeU:
+ case kPPC_I8x16Shl:
+ case kPPC_I8x16ShrS:
+ case kPPC_I8x16ShrU:
+ case kPPC_S128And:
+ case kPPC_S128Or:
+ case kPPC_S128Xor:
+ case kPPC_S128Zero:
+ case kPPC_S128Not:
+ case kPPC_S128Select:
return kNoOpcodeFlags;
case kPPC_LoadWordS8:
diff --git a/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc b/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc
index 1598fbad041..d5ec475a808 100644
--- a/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc
+++ b/chromium/v8/src/compiler/backend/ppc/instruction-selector-ppc.cc
@@ -2127,6 +2127,86 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8) \
V(I8x16)
+#define SIMD_BINOP_LIST(V) \
+ V(F64x2Add) \
+ V(F64x2Sub) \
+ V(F64x2Mul) \
+ V(F64x2Eq) \
+ V(F64x2Ne) \
+ V(F64x2Le) \
+ V(F64x2Lt) \
+ V(F32x4Add) \
+ V(F32x4AddHoriz) \
+ V(F32x4Sub) \
+ V(F32x4Mul) \
+ V(F32x4Eq) \
+ V(F32x4Ne) \
+ V(F32x4Lt) \
+ V(F32x4Le) \
+ V(I64x2Add) \
+ V(I64x2Sub) \
+ V(I64x2Mul) \
+ V(I32x4Add) \
+ V(I32x4AddHoriz) \
+ V(I32x4Sub) \
+ V(I32x4Mul) \
+ V(I32x4MinS) \
+ V(I32x4MinU) \
+ V(I32x4MaxS) \
+ V(I32x4MaxU) \
+ V(I32x4Eq) \
+ V(I32x4Ne) \
+ V(I32x4GtS) \
+ V(I32x4GeS) \
+ V(I32x4GtU) \
+ V(I32x4GeU) \
+ V(I16x8Add) \
+ V(I16x8AddHoriz) \
+ V(I16x8Sub) \
+ V(I16x8Mul) \
+ V(I16x8MinS) \
+ V(I16x8MinU) \
+ V(I16x8MaxS) \
+ V(I16x8MaxU) \
+ V(I16x8Eq) \
+ V(I16x8Ne) \
+ V(I16x8GtS) \
+ V(I16x8GeS) \
+ V(I16x8GtU) \
+ V(I16x8GeU) \
+ V(I8x16Add) \
+ V(I8x16Sub) \
+ V(I8x16Mul) \
+ V(I8x16MinS) \
+ V(I8x16MinU) \
+ V(I8x16MaxS) \
+ V(I8x16MaxU) \
+ V(I8x16Eq) \
+ V(I8x16Ne) \
+ V(I8x16GtS) \
+ V(I8x16GeS) \
+ V(I8x16GtU) \
+ V(I8x16GeU) \
+ V(S128And) \
+ V(S128Or) \
+ V(S128Xor)
+
+#define SIMD_UNOP_LIST(V) V(S128Not)
+
+#define SIMD_SHIFT_LIST(V) \
+ V(I64x2Shl) \
+ V(I64x2ShrS) \
+ V(I64x2ShrU) \
+ V(I32x4Shl) \
+ V(I32x4ShrS) \
+ V(I32x4ShrU) \
+ V(I16x8Shl) \
+ V(I16x8ShrS) \
+ V(I16x8ShrU) \
+ V(I8x16Shl) \
+ V(I8x16ShrS) \
+ V(I8x16ShrU)
+
#define SIMD_VISIT_SPLAT(Type) \
void InstructionSelector::Visit##Type##Splat(Node* node) { \
PPCOperandGenerator g(this); \
@@ -2135,7 +2215,6 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
}
SIMD_TYPES(SIMD_VISIT_SPLAT)
#undef SIMD_VISIT_SPLAT
-#undef SIMD_TYPES
#define SIMD_VISIT_EXTRACT_LANE(Type, Sign) \
void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
@@ -2153,72 +2232,74 @@ SIMD_VISIT_EXTRACT_LANE(I8x16, U)
SIMD_VISIT_EXTRACT_LANE(I8x16, S)
#undef SIMD_VISIT_EXTRACT_LANE
-void InstructionSelector::VisitI32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4Add(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4Sub(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4Shl(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4ShrS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4Mul(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4MaxS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4MinS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4Eq(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4Ne(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4MinU(Node* node) { UNIMPLEMENTED(); }
+#define SIMD_VISIT_REPLACE_LANE(Type) \
+ void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+ PPCOperandGenerator g(this); \
+ int32_t lane = OpParameter<int32_t>(node->op()); \
+ Emit(kPPC_##Type##ReplaceLane, g.DefineAsRegister(node), \
+ g.UseUniqueRegister(node->InputAt(0)), g.UseImmediate(lane), \
+ g.UseUniqueRegister(node->InputAt(1))); \
+ }
+SIMD_TYPES(SIMD_VISIT_REPLACE_LANE)
+#undef SIMD_VISIT_REPLACE_LANE
+
+#define SIMD_VISIT_BINOP(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ PPCOperandGenerator g(this); \
+ InstructionOperand temps[] = {g.TempSimd128Register(), \
+ g.TempSimd128Register()}; \
+ Emit(kPPC_##Opcode, g.DefineAsRegister(node), \
+ g.UseUniqueRegister(node->InputAt(0)), \
+ g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
+ }
+SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
+#undef SIMD_VISIT_BINOP
+#undef SIMD_BINOP_LIST
+
+#define SIMD_VISIT_UNOP(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ PPCOperandGenerator g(this); \
+ Emit(kPPC_##Opcode, g.DefineAsRegister(node), \
+ g.UseRegister(node->InputAt(0))); \
+ }
+SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
+#undef SIMD_VISIT_UNOP
+#undef SIMD_UNOP_LIST
+
+#define SIMD_VISIT_SHIFT(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ PPCOperandGenerator g(this); \
+ Emit(kPPC_##Opcode, g.DefineAsRegister(node), \
+ g.UseUniqueRegister(node->InputAt(0)), \
+ g.UseUniqueRegister(node->InputAt(1))); \
+ }
+SIMD_SHIFT_LIST(SIMD_VISIT_SHIFT)
+#undef SIMD_VISIT_SHIFT
+#undef SIMD_SHIFT_LIST
+#undef SIMD_TYPES
-void InstructionSelector::VisitI32x4MaxU(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitS128Zero(Node* node) {
+ PPCOperandGenerator g(this);
+ Emit(kPPC_S128Zero, g.DefineAsRegister(node));
+}
-void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitS128Select(Node* node) {
+ PPCOperandGenerator g(this);
+ Emit(kPPC_S128Select, g.DefineAsRegister(node),
+ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+ g.UseRegister(node->InputAt(2)));
+}
void InstructionSelector::VisitI32x4Neg(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI32x4GtS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4GeS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4GtU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI32x4GeU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8ReplaceLane(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8Shl(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8ShrS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8ShrU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8Add(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI16x8AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
-void InstructionSelector::VisitI16x8Sub(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI16x8SubSaturateS(Node* node) {
UNIMPLEMENTED();
}
-void InstructionSelector::VisitI16x8Mul(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8MinS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8MaxS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8Eq(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8Ne(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI16x8AddSaturateU(Node* node) {
UNIMPLEMENTED();
}
@@ -2227,20 +2308,8 @@ void InstructionSelector::VisitI16x8SubSaturateU(Node* node) {
UNIMPLEMENTED();
}
-void InstructionSelector::VisitI16x8MinU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8MaxU(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI16x8Neg(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI16x8GtS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8GeS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8GtU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI16x8GeU(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI16x8RoundingAverageU(Node* node) {
UNIMPLEMENTED();
}
@@ -2251,32 +2320,14 @@ void InstructionSelector::VisitI8x16RoundingAverageU(Node* node) {
void InstructionSelector::VisitI8x16Neg(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI8x16ReplaceLane(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16Add(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI8x16AddSaturateS(Node* node) {
UNIMPLEMENTED();
}
-void InstructionSelector::VisitI8x16Sub(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI8x16SubSaturateS(Node* node) {
UNIMPLEMENTED();
}
-void InstructionSelector::VisitI8x16MinS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16MaxS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16Eq(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16Ne(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16GtS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16GeS(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI8x16AddSaturateU(Node* node) {
UNIMPLEMENTED();
}
@@ -2285,36 +2336,8 @@ void InstructionSelector::VisitI8x16SubSaturateU(Node* node) {
UNIMPLEMENTED();
}
-void InstructionSelector::VisitI8x16MinU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16MaxU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16GtU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16GeU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitS128And(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitS128Or(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitS128Xor(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitS128Not(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitS128AndNot(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitS128Zero(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Eq(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Ne(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Lt(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Le(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::EmitPrepareResults(
ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
Node* node) {
@@ -2338,12 +2361,6 @@ void InstructionSelector::EmitPrepareResults(
}
}
-void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitF32x4Sqrt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Div(Node* node) { UNIMPLEMENTED(); }
@@ -2352,8 +2369,6 @@ void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitS128Select(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); }
@@ -2364,10 +2379,6 @@ void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
@@ -2431,68 +2442,32 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
UNIMPLEMENTED();
}
-void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitS1x8AnyTrue(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV32x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitS1x8AllTrue(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV32x4AllTrue(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitS1x16AnyTrue(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV16x8AnyTrue(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitS1x16AllTrue(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV16x8AllTrue(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV8x16AnyTrue(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitV8x16AllTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2Add(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Sub(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Mul(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitF64x2Eq(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Ne(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Lt(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitF64x2Le(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
-void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI64x2Sub(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI64x2ShrU(Node* node) { UNIMPLEMENTED(); }
-
-void InstructionSelector::VisitI64x2Mul(Node* node) { UNIMPLEMENTED(); }
-
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
diff --git a/chromium/v8/src/compiler/backend/register-allocator.cc b/chromium/v8/src/compiler/backend/register-allocator.cc
index 8b74ef68b14..aab47722044 100644
--- a/chromium/v8/src/compiler/backend/register-allocator.cc
+++ b/chromium/v8/src/compiler/backend/register-allocator.cc
@@ -391,8 +391,8 @@ LiveRange::LiveRange(int relative_id, MachineRepresentation rep,
next_(nullptr),
current_interval_(nullptr),
last_processed_use_(nullptr),
- current_hint_position_(nullptr),
- splitting_pointer_(nullptr) {
+ splitting_pointer_(nullptr),
+ current_hint_position_(nullptr) {
DCHECK(AllocatedOperand::IsSupportedRepresentation(rep));
bits_ = AssignedRegisterField::encode(kUnassignedRegister) |
RepresentationField::encode(rep) |
@@ -473,11 +473,41 @@ RegisterKind LiveRange::kind() const {
return IsFloatingPoint(representation()) ? FP_REGISTERS : GENERAL_REGISTERS;
}
-UsePosition* LiveRange::FirstHintPosition(int* register_index) const {
- for (UsePosition* pos = first_pos_; pos != nullptr; pos = pos->next()) {
- if (pos->HintRegister(register_index)) return pos;
+UsePosition* LiveRange::FirstHintPosition(int* register_index) {
+ if (!first_pos_) return nullptr;
+ if (current_hint_position_) {
+ if (current_hint_position_->pos() < first_pos_->pos()) {
+ current_hint_position_ = first_pos_;
+ }
+ if (current_hint_position_->pos() > End()) {
+ current_hint_position_ = nullptr;
+ }
}
- return nullptr;
+ bool needs_revisit = false;
+ UsePosition* pos = current_hint_position_;
+ for (; pos != nullptr; pos = pos->next()) {
+ if (pos->HintRegister(register_index)) {
+ break;
+ }
+ // Phi and use position hints can be assigned during allocation which
+ // would invalidate the cached hint position. Make sure we revisit them.
+ needs_revisit = needs_revisit ||
+ pos->hint_type() == UsePositionHintType::kPhi ||
+ pos->hint_type() == UsePositionHintType::kUsePos;
+ }
+ if (!needs_revisit) {
+ current_hint_position_ = pos;
+ }
+#ifdef DEBUG
+ UsePosition* pos_check = first_pos_;
+ for (; pos_check != nullptr; pos_check = pos_check->next()) {
+ if (pos_check->HasHint()) {
+ break;
+ }
+ }
+ CHECK_EQ(pos, pos_check);
+#endif
+ return pos;
}
UsePosition* LiveRange::NextUsePosition(LifetimePosition start) const {
@@ -684,6 +714,7 @@ UsePosition* LiveRange::DetachAt(LifetimePosition position, LiveRange* result,
first_pos_ = nullptr;
}
result->first_pos_ = use_after;
+ result->current_hint_position_ = current_hint_position_;
// Discard cached iteration state. It might be pointing
// to the use that no longer belongs to this live range.
@@ -693,6 +724,7 @@ UsePosition* LiveRange::DetachAt(LifetimePosition position, LiveRange* result,
if (connect_hints == ConnectHints && use_before != nullptr &&
use_after != nullptr) {
use_after->SetHint(use_before);
+ result->current_hint_position_ = use_after;
}
#ifdef DEBUG
VerifyChildStructure();
@@ -2660,6 +2692,7 @@ void LiveRangeBuilder::BuildLiveRanges() {
pos->set_type(new_type, true);
}
}
+ range->ResetCurrentHintPosition();
}
for (auto preassigned : data()->preassigned_slot_ranges()) {
TopLevelLiveRange* range = preassigned.first;
@@ -3493,7 +3526,7 @@ void LinearScanAllocator::ComputeStateFromManyPredecessors(
// Choose the live ranges from the majority.
const size_t majority =
(current_block->PredecessorCount() + 2 - deferred_blocks) / 2;
- bool taken_registers[RegisterConfiguration::kMaxRegisters] = {0};
+ bool taken_registers[RegisterConfiguration::kMaxRegisters] = {false};
auto assign_to_live = [this, counts, majority](
std::function<bool(TopLevelLiveRange*)> filter,
RangeWithRegisterSet* to_be_live,
diff --git a/chromium/v8/src/compiler/backend/register-allocator.h b/chromium/v8/src/compiler/backend/register-allocator.h
index f890bd868b7..85a9cf12170 100644
--- a/chromium/v8/src/compiler/backend/register-allocator.h
+++ b/chromium/v8/src/compiler/backend/register-allocator.h
@@ -618,14 +618,14 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
LiveRange* SplitAt(LifetimePosition position, Zone* zone);
// Returns nullptr when no register is hinted, otherwise sets register_index.
- UsePosition* FirstHintPosition(int* register_index) const;
- UsePosition* FirstHintPosition() const {
+ // Uses {current_hint_position_} as a cache, and tries to update it.
+ UsePosition* FirstHintPosition(int* register_index);
+ UsePosition* FirstHintPosition() {
int register_index;
return FirstHintPosition(&register_index);
}
UsePosition* current_hint_position() const {
- DCHECK(current_hint_position_ == FirstHintPosition());
return current_hint_position_;
}
@@ -656,6 +656,7 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
const InstructionOperand& spill_op);
void SetUseHints(int register_index);
void UnsetUseHints() { SetUseHints(kUnassignedRegister); }
+ void ResetCurrentHintPosition() { current_hint_position_ = first_pos_; }
void Print(const RegisterConfiguration* config, bool with_children) const;
void Print(bool with_children) const;
@@ -701,10 +702,10 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
mutable UseInterval* current_interval_;
// This is used as a cache, it doesn't affect correctness.
mutable UsePosition* last_processed_use_;
- // This is used as a cache, it's invalid outside of BuildLiveRanges.
- mutable UsePosition* current_hint_position_;
// Cache the last position splintering stopped at.
mutable UsePosition* splitting_pointer_;
+ // This is used as a cache in BuildLiveRanges and during register allocation.
+ UsePosition* current_hint_position_;
LiveRangeBundle* bundle_ = nullptr;
// Next interval start, relative to the current linear scan position.
LifetimePosition next_start_;
diff --git a/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc b/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc
index cb79373b425..bef8e7c15aa 100644
--- a/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc
+++ b/chromium/v8/src/compiler/backend/s390/code-generator-s390.cc
@@ -3853,10 +3853,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
// vector boolean unops
- case kS390_S1x2AnyTrue:
- case kS390_S1x4AnyTrue:
- case kS390_S1x8AnyTrue:
- case kS390_S1x16AnyTrue: {
+ case kS390_V64x2AnyTrue:
+ case kS390_V32x4AnyTrue:
+ case kS390_V16x8AnyTrue:
+ case kS390_V8x16AnyTrue: {
Simd128Register src = i.InputSimd128Register(0);
Register dst = i.OutputRegister();
Register temp = i.TempRegister(0);
@@ -3879,19 +3879,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vtm(kScratchDoubleReg, kScratchDoubleReg, Condition(0), Condition(0), \
Condition(0)); \
__ locgr(Condition(8), dst, temp);
- case kS390_S1x2AllTrue: {
+ case kS390_V64x2AllTrue: {
SIMD_ALL_TRUE(3)
break;
}
- case kS390_S1x4AllTrue: {
+ case kS390_V32x4AllTrue: {
SIMD_ALL_TRUE(2)
break;
}
- case kS390_S1x8AllTrue: {
+ case kS390_V16x8AllTrue: {
SIMD_ALL_TRUE(1)
break;
}
- case kS390_S1x16AllTrue: {
+ case kS390_V8x16AllTrue: {
SIMD_ALL_TRUE(0)
break;
}
@@ -4154,10 +4154,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
for (int i = 0, j = 0; i < 2; i++, j = +2) {
#ifdef V8_TARGET_BIG_ENDIAN
__ lgfi(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1]));
- __ aih(i < 1 ? ip : r0, Operand(k8x16_indices[j]));
+ __ iihf(i < 1 ? ip : r0, Operand(k8x16_indices[j]));
#else
__ lgfi(i < 1 ? ip : r0, Operand(k8x16_indices[j]));
- __ aih(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1]));
+ __ iihf(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1]));
#endif
}
__ vlvgp(kScratchDoubleReg, ip, r0);
@@ -4185,6 +4185,119 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
#endif
break;
}
+ case kS390_I32x4BitMask: {
+#ifdef V8_TARGET_BIG_ENDIAN
+ __ lgfi(kScratchReg, Operand(0x204060));
+ __ iihf(kScratchReg, Operand(0x80808080)); // Zeroing the high bits.
+#else
+ __ lgfi(kScratchReg, Operand(0x80808080));
+ __ iihf(kScratchReg, Operand(0x60402000));
+#endif
+ __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
+ __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
+ Condition(0), Condition(0), Condition(0));
+ __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
+ Condition(0));
+ break;
+ }
+ case kS390_I16x8BitMask: {
+#ifdef V8_TARGET_BIG_ENDIAN
+ __ lgfi(kScratchReg, Operand(0x40506070));
+ __ iihf(kScratchReg, Operand(0x102030));
+#else
+ __ lgfi(kScratchReg, Operand(0x30201000));
+ __ iihf(kScratchReg, Operand(0x70605040));
+#endif
+ __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
+ __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
+ Condition(0), Condition(0), Condition(0));
+ __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
+ Condition(0));
+ break;
+ }
+ case kS390_I8x16BitMask: {
+#ifdef V8_TARGET_BIG_ENDIAN
+ __ lgfi(r0, Operand(0x60687078));
+ __ iihf(r0, Operand(0x40485058));
+ __ lgfi(ip, Operand(0x20283038));
+ __ iihf(ip, Operand(0x81018));
+#else
+ __ lgfi(ip, Operand(0x58504840));
+ __ iihf(ip, Operand(0x78706860));
+ __ lgfi(r0, Operand(0x18100800));
+ __ iihf(r0, Operand(0x38302820));
+#endif
+ __ vlvgp(kScratchDoubleReg, ip, r0);
+ __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
+ Condition(0), Condition(0), Condition(0));
+ __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 3),
+ Condition(1));
+ break;
+ }
+ case kS390_F32x4Pmin: {
+ __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1), Condition(3), Condition(0),
+ Condition(2));
+ break;
+ }
+ case kS390_F32x4Pmax: {
+ __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1), Condition(3), Condition(0),
+ Condition(2));
+ break;
+ }
+ case kS390_F64x2Pmin: {
+ __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1), Condition(3), Condition(0),
+ Condition(3));
+ break;
+ }
+ case kS390_F64x2Pmax: {
+ __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1), Condition(3), Condition(0),
+ Condition(3));
+ break;
+ }
+ case kS390_F64x2Ceil: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(6),
+ Condition(0), Condition(3));
+ break;
+ }
+ case kS390_F64x2Floor: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(7),
+ Condition(0), Condition(3));
+ break;
+ }
+ case kS390_F64x2Trunc: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(5),
+ Condition(0), Condition(3));
+ break;
+ }
+ case kS390_F64x2NearestInt: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(4),
+ Condition(0), Condition(3));
+ break;
+ }
+ case kS390_F32x4Ceil: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(6),
+ Condition(0), Condition(2));
+ break;
+ }
+ case kS390_F32x4Floor: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(7),
+ Condition(0), Condition(2));
+ break;
+ }
+ case kS390_F32x4Trunc: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(5),
+ Condition(0), Condition(2));
+ break;
+ }
+ case kS390_F32x4NearestInt: {
+ __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(4),
+ Condition(0), Condition(2));
+ break;
+ }
case kS390_StoreCompressTagged: {
CHECK(!instr->HasOutput());
size_t index = 0;
diff --git a/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h b/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h
index 6101b22166c..f588e854265 100644
--- a/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h
+++ b/chromium/v8/src/compiler/backend/s390/instruction-codes-s390.h
@@ -215,6 +215,12 @@ namespace compiler {
V(S390_F64x2ExtractLane) \
V(S390_F64x2Qfma) \
V(S390_F64x2Qfms) \
+ V(S390_F64x2Pmin) \
+ V(S390_F64x2Pmax) \
+ V(S390_F64x2Ceil) \
+ V(S390_F64x2Floor) \
+ V(S390_F64x2Trunc) \
+ V(S390_F64x2NearestInt) \
V(S390_F32x4Splat) \
V(S390_F32x4ExtractLane) \
V(S390_F32x4ReplaceLane) \
@@ -238,6 +244,12 @@ namespace compiler {
V(S390_F32x4Max) \
V(S390_F32x4Qfma) \
V(S390_F32x4Qfms) \
+ V(S390_F32x4Pmin) \
+ V(S390_F32x4Pmax) \
+ V(S390_F32x4Ceil) \
+ V(S390_F32x4Floor) \
+ V(S390_F32x4Trunc) \
+ V(S390_F32x4NearestInt) \
V(S390_I64x2Neg) \
V(S390_I64x2Add) \
V(S390_I64x2Sub) \
@@ -286,6 +298,7 @@ namespace compiler {
V(S390_I32x4UConvertI16x8Low) \
V(S390_I32x4UConvertI16x8High) \
V(S390_I32x4Abs) \
+ V(S390_I32x4BitMask) \
V(S390_I16x8Splat) \
V(S390_I16x8ExtractLaneU) \
V(S390_I16x8ExtractLaneS) \
@@ -320,6 +333,7 @@ namespace compiler {
V(S390_I16x8SubSaturateU) \
V(S390_I16x8RoundingAverageU) \
V(S390_I16x8Abs) \
+ V(S390_I16x8BitMask) \
V(S390_I8x16Splat) \
V(S390_I8x16ExtractLaneU) \
V(S390_I8x16ExtractLaneS) \
@@ -349,16 +363,17 @@ namespace compiler {
V(S390_I8x16SubSaturateU) \
V(S390_I8x16RoundingAverageU) \
V(S390_I8x16Abs) \
+ V(S390_I8x16BitMask) \
V(S390_S8x16Shuffle) \
V(S390_S8x16Swizzle) \
- V(S390_S1x2AnyTrue) \
- V(S390_S1x4AnyTrue) \
- V(S390_S1x8AnyTrue) \
- V(S390_S1x16AnyTrue) \
- V(S390_S1x2AllTrue) \
- V(S390_S1x4AllTrue) \
- V(S390_S1x8AllTrue) \
- V(S390_S1x16AllTrue) \
+ V(S390_V64x2AnyTrue) \
+ V(S390_V32x4AnyTrue) \
+ V(S390_V16x8AnyTrue) \
+ V(S390_V8x16AnyTrue) \
+ V(S390_V64x2AllTrue) \
+ V(S390_V32x4AllTrue) \
+ V(S390_V16x8AllTrue) \
+ V(S390_V8x16AllTrue) \
V(S390_S128And) \
V(S390_S128Or) \
V(S390_S128Xor) \
diff --git a/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc b/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc
index 502ce229f50..775590a863d 100644
--- a/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc
+++ b/chromium/v8/src/compiler/backend/s390/instruction-scheduler-s390.cc
@@ -161,6 +161,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_F64x2ExtractLane:
case kS390_F64x2Qfma:
case kS390_F64x2Qfms:
+ case kS390_F64x2Pmin:
+ case kS390_F64x2Pmax:
+ case kS390_F64x2Ceil:
+ case kS390_F64x2Floor:
+ case kS390_F64x2Trunc:
+ case kS390_F64x2NearestInt:
case kS390_F32x4Splat:
case kS390_F32x4ExtractLane:
case kS390_F32x4ReplaceLane:
@@ -184,6 +190,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_F32x4Max:
case kS390_F32x4Qfma:
case kS390_F32x4Qfms:
+ case kS390_F32x4Pmin:
+ case kS390_F32x4Pmax:
+ case kS390_F32x4Ceil:
+ case kS390_F32x4Floor:
+ case kS390_F32x4Trunc:
+ case kS390_F32x4NearestInt:
case kS390_I64x2Neg:
case kS390_I64x2Add:
case kS390_I64x2Sub:
@@ -232,6 +244,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I32x4UConvertI16x8Low:
case kS390_I32x4UConvertI16x8High:
case kS390_I32x4Abs:
+ case kS390_I32x4BitMask:
case kS390_I16x8Splat:
case kS390_I16x8ExtractLaneU:
case kS390_I16x8ExtractLaneS:
@@ -266,6 +279,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I16x8SubSaturateU:
case kS390_I16x8RoundingAverageU:
case kS390_I16x8Abs:
+ case kS390_I16x8BitMask:
case kS390_I8x16Splat:
case kS390_I8x16ExtractLaneU:
case kS390_I8x16ExtractLaneS:
@@ -295,16 +309,17 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I8x16SubSaturateU:
case kS390_I8x16RoundingAverageU:
case kS390_I8x16Abs:
+ case kS390_I8x16BitMask:
case kS390_S8x16Shuffle:
case kS390_S8x16Swizzle:
- case kS390_S1x2AnyTrue:
- case kS390_S1x4AnyTrue:
- case kS390_S1x8AnyTrue:
- case kS390_S1x16AnyTrue:
- case kS390_S1x2AllTrue:
- case kS390_S1x4AllTrue:
- case kS390_S1x8AllTrue:
- case kS390_S1x16AllTrue:
+ case kS390_V64x2AnyTrue:
+ case kS390_V32x4AnyTrue:
+ case kS390_V16x8AnyTrue:
+ case kS390_V8x16AnyTrue:
+ case kS390_V64x2AllTrue:
+ case kS390_V32x4AllTrue:
+ case kS390_V16x8AllTrue:
+ case kS390_V8x16AllTrue:
case kS390_S128And:
case kS390_S128Or:
case kS390_S128Xor:
diff --git a/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc b/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc
index 515e8dd127b..39089f346ed 100644
--- a/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc
+++ b/chromium/v8/src/compiler/backend/s390/instruction-selector-s390.cc
@@ -2635,11 +2635,19 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(F64x2Abs) \
V(F64x2Neg) \
V(F64x2Sqrt) \
+ V(F64x2Ceil) \
+ V(F64x2Floor) \
+ V(F64x2Trunc) \
+ V(F64x2NearestInt) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4Sqrt) \
+ V(F32x4Ceil) \
+ V(F32x4Floor) \
+ V(F32x4Trunc) \
+ V(F32x4NearestInt) \
V(I64x2Neg) \
V(I16x8Abs) \
V(I32x4Neg) \
@@ -2672,14 +2680,14 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(I8x16ShrU)
#define SIMD_BOOL_LIST(V) \
- V(S1x2AnyTrue) \
- V(S1x4AnyTrue) \
- V(S1x8AnyTrue) \
- V(S1x16AnyTrue) \
- V(S1x2AllTrue) \
- V(S1x4AllTrue) \
- V(S1x8AllTrue) \
- V(S1x16AllTrue)
+ V(V64x2AnyTrue) \
+ V(V32x4AnyTrue) \
+ V(V16x8AnyTrue) \
+ V(V8x16AnyTrue) \
+ V(V64x2AllTrue) \
+ V(V32x4AllTrue) \
+ V(V16x8AllTrue) \
+ V(V8x16AllTrue)
#define SIMD_CONVERSION_LIST(V) \
V(I32x4SConvertF32x4) \
@@ -2794,6 +2802,29 @@ SIMD_VISIT_QFMOP(F64x2Qfms)
SIMD_VISIT_QFMOP(F32x4Qfma)
SIMD_VISIT_QFMOP(F32x4Qfms)
#undef SIMD_VISIT_QFMOP
+
+#define SIMD_VISIT_BITMASK(Opcode) \
+ void InstructionSelector::Visit##Opcode(Node* node) { \
+ S390OperandGenerator g(this); \
+ Emit(kS390_##Opcode, g.DefineAsRegister(node), \
+ g.UseUniqueRegister(node->InputAt(0))); \
+ }
+SIMD_VISIT_BITMASK(I8x16BitMask)
+SIMD_VISIT_BITMASK(I16x8BitMask)
+SIMD_VISIT_BITMASK(I32x4BitMask)
+#undef SIMD_VISIT_BITMASK
+
+#define SIMD_VISIT_PMIN_MAX(Type) \
+ void InstructionSelector::Visit##Type(Node* node) { \
+ S390OperandGenerator g(this); \
+ Emit(kS390_##Type, g.DefineAsRegister(node), \
+ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); \
+ }
+SIMD_VISIT_PMIN_MAX(F64x2Pmin)
+SIMD_VISIT_PMIN_MAX(F32x4Pmin)
+SIMD_VISIT_PMIN_MAX(F64x2Pmax)
+SIMD_VISIT_PMIN_MAX(F32x4Pmax)
+#undef SIMD_VISIT_PMIN_MAX
#undef SIMD_TYPES
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
diff --git a/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc b/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc
index 4f99ad49ba8..110a478c543 100644
--- a/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc
+++ b/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc
@@ -194,6 +194,94 @@ class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
XMMRegister const result_;
};
+class OutOfLineF32x4Min final : public OutOfLineCode {
+ public:
+ OutOfLineF32x4Min(CodeGenerator* gen, XMMRegister result, XMMRegister error)
+ : OutOfLineCode(gen), result_(result), error_(error) {}
+
+ void Generate() final {
+ // |result| is the partial result, |kScratchDoubleReg| is the error.
+ // propagate -0's and NaNs (possibly non-canonical) from the error.
+ __ Orps(error_, result_);
+ // Canonicalize NaNs by quieting and clearing the payload.
+ __ Cmpps(result_, error_, int8_t{3});
+ __ Orps(error_, result_);
+ __ Psrld(result_, byte{10});
+ __ Andnps(result_, error_);
+ }
+
+ private:
+ XMMRegister const result_;
+ XMMRegister const error_;
+};
+
+class OutOfLineF64x2Min final : public OutOfLineCode {
+ public:
+ OutOfLineF64x2Min(CodeGenerator* gen, XMMRegister result, XMMRegister error)
+ : OutOfLineCode(gen), result_(result), error_(error) {}
+
+ void Generate() final {
+ // |result| is the partial result, |kScratchDoubleReg| is the error.
+ // propagate -0's and NaNs (possibly non-canonical) from the error.
+ __ Orpd(error_, result_);
+ // Canonicalize NaNs by quieting and clearing the payload.
+ __ Cmppd(result_, error_, int8_t{3});
+ __ Orpd(error_, result_);
+ __ Psrlq(result_, 13);
+ __ Andnpd(result_, error_);
+ }
+
+ private:
+ XMMRegister const result_;
+ XMMRegister const error_;
+};
+
+class OutOfLineF32x4Max final : public OutOfLineCode {
+ public:
+ OutOfLineF32x4Max(CodeGenerator* gen, XMMRegister result, XMMRegister error)
+ : OutOfLineCode(gen), result_(result), error_(error) {}
+
+ void Generate() final {
+ // |result| is the partial result, |kScratchDoubleReg| is the error.
+ // Propagate NaNs (possibly non-canonical).
+ __ Orps(result_, error_);
+ // Propagate sign errors and (subtle) quiet NaNs.
+ __ Subps(result_, error_);
+ // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+ __ Cmpps(error_, result_, int8_t{3});
+ __ Psrld(error_, byte{10});
+ __ Andnps(error_, result_);
+ __ Movaps(result_, error_);
+ }
+
+ private:
+ XMMRegister const result_;
+ XMMRegister const error_;
+};
+
+class OutOfLineF64x2Max final : public OutOfLineCode {
+ public:
+ OutOfLineF64x2Max(CodeGenerator* gen, XMMRegister result, XMMRegister error)
+ : OutOfLineCode(gen), result_(result), error_(error) {}
+
+ void Generate() final {
+ // |result| is the partial result, |kScratchDoubleReg| is the error.
+ // Propagate NaNs (possibly non-canonical).
+ __ Orpd(result_, error_);
+ // Propagate sign errors and (subtle) quiet NaNs.
+ __ Subpd(result_, error_);
+ // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+ __ Cmppd(error_, result_, int8_t{3});
+ __ Psrlq(error_, byte{13});
+ __ Andnpd(error_, result_);
+ __ Movapd(result_, error_);
+ }
+
+ private:
+ XMMRegister const result_;
+ XMMRegister const error_;
+};
+
class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
public:
OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
@@ -2328,18 +2416,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
- // The minpd instruction doesn't propagate NaNs and +0's in its first
- // operand. Perform minpd in both orders, merge the resuls, and adjust.
+ // The minpd instruction doesn't propagate NaNs and -0's in its first
+ // operand. Perform minpd in both orders and compare results. Handle the
+ // unlikely case of discrepancies out of line.
__ Movapd(kScratchDoubleReg, src1);
__ Minpd(kScratchDoubleReg, dst);
__ Minpd(dst, src1);
- // propagate -0's and NaNs, which may be non-canonical.
- __ Orpd(kScratchDoubleReg, dst);
- // Canonicalize NaNs by quieting and clearing the payload.
- __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
- __ Orpd(kScratchDoubleReg, dst);
- __ Psrlq(dst, 13);
- __ Andnpd(dst, kScratchDoubleReg);
+ // Most likely there is no difference and we're done.
+ __ Xorpd(kScratchDoubleReg, dst);
+ __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
+ auto ool = new (zone()) OutOfLineF64x2Min(this, dst, kScratchDoubleReg);
+ __ j(not_zero, ool->entry());
+ __ bind(ool->exit());
break;
}
case kX64F64x2Max: {
@@ -2347,20 +2435,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxpd instruction doesn't propagate NaNs and +0's in its first
- // operand. Perform maxpd in both orders, merge the resuls, and adjust.
+ // operand. Perform maxpd in both orders and compare results. Handle the
+ // unlikely case of discrepancies out of line.
__ Movapd(kScratchDoubleReg, src1);
__ Maxpd(kScratchDoubleReg, dst);
__ Maxpd(dst, src1);
- // Find discrepancies.
- __ Xorpd(dst, kScratchDoubleReg);
- // Propagate NaNs, which may be non-canonical.
- __ Orpd(kScratchDoubleReg, dst);
- // Propagate sign discrepancy and (subtle) quiet NaNs.
- __ Subpd(kScratchDoubleReg, dst);
- // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
- __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
- __ Psrlq(dst, 13);
- __ Andnpd(dst, kScratchDoubleReg);
+ // Most likely there is no difference and we're done.
+ __ Xorpd(kScratchDoubleReg, dst);
+ __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
+ auto ool = new (zone()) OutOfLineF64x2Max(this, dst, kScratchDoubleReg);
+ __ j(not_zero, ool->entry());
+ __ bind(ool->exit());
break;
}
case kX64F64x2Eq: {
@@ -2524,18 +2609,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
- // The minps instruction doesn't propagate NaNs and +0's in its first
- // operand. Perform minps in both orders, merge the resuls, and adjust.
+ // The minps instruction doesn't propagate NaNs and -0's in its first
+ // operand. Perform minps in both orders and compare results. Handle the
+ // unlikely case of discrepancies out of line.
__ Movaps(kScratchDoubleReg, src1);
__ Minps(kScratchDoubleReg, dst);
__ Minps(dst, src1);
- // propagate -0's and NaNs, which may be non-canonical.
- __ Orps(kScratchDoubleReg, dst);
- // Canonicalize NaNs by quieting and clearing the payload.
- __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
- __ Orps(kScratchDoubleReg, dst);
- __ Psrld(dst, byte{10});
- __ Andnps(dst, kScratchDoubleReg);
+ // Most likely there is no difference and we're done.
+ __ Xorps(kScratchDoubleReg, dst);
+ __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
+ auto ool = new (zone()) OutOfLineF32x4Min(this, dst, kScratchDoubleReg);
+ __ j(not_zero, ool->entry());
+ __ bind(ool->exit());
break;
}
case kX64F32x4Max: {
@@ -2543,20 +2628,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxps instruction doesn't propagate NaNs and +0's in its first
- // operand. Perform maxps in both orders, merge the resuls, and adjust.
+ // operand. Perform maxps in both orders and compare results. Handle the
+ // unlikely case of discrepancies out of line.
__ Movaps(kScratchDoubleReg, src1);
__ Maxps(kScratchDoubleReg, dst);
__ Maxps(dst, src1);
- // Find discrepancies.
- __ Xorps(dst, kScratchDoubleReg);
- // Propagate NaNs, which may be non-canonical.
- __ Orps(kScratchDoubleReg, dst);
- // Propagate sign discrepancy and (subtle) quiet NaNs.
- __ Subps(kScratchDoubleReg, dst);
- // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
- __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
- __ Psrld(dst, byte{10});
- __ Andnps(dst, kScratchDoubleReg);
+ // Most likely there is no difference and we're done.
+ __ Xorps(kScratchDoubleReg, dst);
+ __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
+ auto ool = new (zone()) OutOfLineF32x4Max(this, dst, kScratchDoubleReg);
+ __ j(not_zero, ool->entry());
+ __ bind(ool->exit());
break;
}
case kX64F32x4Eq: {
@@ -2619,6 +2701,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Maxps(dst, i.InputSimd128Register(1));
break;
}
+ case kX64F32x4Round: {
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
+ break;
+ }
+ case kX64F64x2Round: {
+ RoundingMode const mode =
+ static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+ __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
+ break;
+ }
case kX64F64x2Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
@@ -3093,6 +3187,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
break;
}
+ case kX64I32x4DotI16x8S: {
+ __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+ break;
+ }
case kX64S128Zero: {
XMMRegister dst = i.OutputSimd128Register();
__ Xorps(dst, dst);
@@ -3926,10 +4024,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Por(dst, kScratchDoubleReg);
break;
}
- case kX64S1x2AnyTrue:
- case kX64S1x4AnyTrue:
- case kX64S1x8AnyTrue:
- case kX64S1x16AnyTrue: {
+ case kX64V64x2AnyTrue:
+ case kX64V32x4AnyTrue:
+ case kX64V16x8AnyTrue:
+ case kX64V8x16AnyTrue: {
Register dst = i.OutputRegister();
XMMRegister src = i.InputSimd128Register(0);
@@ -3942,19 +4040,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
// 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
// respectively.
- case kX64S1x2AllTrue: {
+ case kX64V64x2AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
break;
}
- case kX64S1x4AllTrue: {
+ case kX64V32x4AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
break;
}
- case kX64S1x8AllTrue: {
+ case kX64V16x8AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
break;
}
- case kX64S1x16AllTrue: {
+ case kX64V8x16AllTrue: {
ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
break;
}
diff --git a/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h b/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h
index 745f5c6cb25..ed7d2060f59 100644
--- a/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h
+++ b/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h
@@ -174,6 +174,7 @@ namespace compiler {
V(X64F64x2Qfms) \
V(X64F64x2Pmin) \
V(X64F64x2Pmax) \
+ V(X64F64x2Round) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
@@ -199,6 +200,7 @@ namespace compiler {
V(X64F32x4Qfms) \
V(X64F32x4Pmin) \
V(X64F32x4Pmax) \
+ V(X64F32x4Round) \
V(X64I64x2Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
@@ -248,6 +250,7 @@ namespace compiler {
V(X64I32x4GeU) \
V(X64I32x4Abs) \
V(X64I32x4BitMask) \
+ V(X64I32x4DotI16x8S) \
V(X64I16x8Splat) \
V(X64I16x8ExtractLaneU) \
V(X64I16x8ExtractLaneS) \
@@ -357,14 +360,14 @@ namespace compiler {
V(X64S8x8Reverse) \
V(X64S8x4Reverse) \
V(X64S8x2Reverse) \
- V(X64S1x2AnyTrue) \
- V(X64S1x2AllTrue) \
- V(X64S1x4AnyTrue) \
- V(X64S1x4AllTrue) \
- V(X64S1x8AnyTrue) \
- V(X64S1x8AllTrue) \
- V(X64S1x16AnyTrue) \
- V(X64S1x16AllTrue) \
+ V(X64V64x2AnyTrue) \
+ V(X64V64x2AllTrue) \
+ V(X64V32x4AnyTrue) \
+ V(X64V32x4AllTrue) \
+ V(X64V16x8AnyTrue) \
+ V(X64V16x8AllTrue) \
+ V(X64V8x16AnyTrue) \
+ V(X64V8x16AllTrue) \
V(X64Word64AtomicLoadUint8) \
V(X64Word64AtomicLoadUint16) \
V(X64Word64AtomicLoadUint32) \
diff --git a/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
index d2c1d14855c..395c4a4e9c7 100644
--- a/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
+++ b/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
@@ -146,6 +146,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Qfms:
case kX64F64x2Pmin:
case kX64F64x2Pmax:
+ case kX64F64x2Round:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
@@ -171,6 +172,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Qfms:
case kX64F32x4Pmin:
case kX64F32x4Pmax:
+ case kX64F32x4Round:
case kX64I64x2Splat:
case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
@@ -220,6 +222,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4GeU:
case kX64I32x4Abs:
case kX64I32x4BitMask:
+ case kX64I32x4DotI16x8S:
case kX64I16x8Splat:
case kX64I16x8ExtractLaneU:
case kX64I16x8ExtractLaneS:
@@ -292,12 +295,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64S128Select:
case kX64S128Zero:
case kX64S128AndNot:
- case kX64S1x2AnyTrue:
- case kX64S1x2AllTrue:
- case kX64S1x4AnyTrue:
- case kX64S1x4AllTrue:
- case kX64S1x8AnyTrue:
- case kX64S1x8AllTrue:
+ case kX64V64x2AnyTrue:
+ case kX64V64x2AllTrue:
+ case kX64V32x4AnyTrue:
+ case kX64V32x4AllTrue:
+ case kX64V16x8AnyTrue:
+ case kX64V16x8AllTrue:
case kX64S8x16Swizzle:
case kX64S8x16Shuffle:
case kX64S32x4Swizzle:
@@ -325,8 +328,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64S8x8Reverse:
case kX64S8x4Reverse:
case kX64S8x2Reverse:
- case kX64S1x16AnyTrue:
- case kX64S1x16AllTrue:
+ case kX64V8x16AnyTrue:
+ case kX64V8x16AllTrue:
return (instr->addressing_mode() == kMode_None)
? kNoOpcodeFlags
: kIsLoadOperation | kHasSideEffect;
diff --git a/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc
index dd3f556937d..ab669864954 100644
--- a/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc
+++ b/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc
@@ -1461,7 +1461,16 @@ void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \
kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \
- V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest))
+ V(Float64RoundTiesEven, \
+ kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \
+ V(F32x4Ceil, kX64F32x4Round | MiscField::encode(kRoundUp)) \
+ V(F32x4Floor, kX64F32x4Round | MiscField::encode(kRoundDown)) \
+ V(F32x4Trunc, kX64F32x4Round | MiscField::encode(kRoundToZero)) \
+ V(F32x4NearestInt, kX64F32x4Round | MiscField::encode(kRoundToNearest)) \
+ V(F64x2Ceil, kX64F64x2Round | MiscField::encode(kRoundUp)) \
+ V(F64x2Floor, kX64F64x2Round | MiscField::encode(kRoundDown)) \
+ V(F64x2Trunc, kX64F64x2Round | MiscField::encode(kRoundToZero)) \
+ V(F64x2NearestInt, kX64F64x2Round | MiscField::encode(kRoundToNearest))
#define RO_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
@@ -1898,16 +1907,33 @@ void VisitWord32EqualImpl(InstructionSelector* selector, Node* node,
X64OperandGenerator g(selector);
const RootsTable& roots_table = selector->isolate()->roots_table();
RootIndex root_index;
- CompressedHeapObjectBinopMatcher m(node);
- if (m.right().HasValue() &&
- roots_table.IsRootHandle(m.right().Value(), &root_index)) {
+ Node* left = nullptr;
+ Handle<HeapObject> right;
+ // HeapConstants and CompressedHeapConstants can be treated the same when
+ // using them as an input to a 32-bit comparison. Check whether either is
+ // present.
+ {
+ CompressedHeapObjectBinopMatcher m(node);
+ if (m.right().HasValue()) {
+ left = m.left().node();
+ right = m.right().Value();
+ } else {
+ HeapObjectBinopMatcher m2(node);
+ if (m2.right().HasValue()) {
+ left = m2.left().node();
+ right = m2.right().Value();
+ }
+ }
+ }
+ if (!right.is_null() && roots_table.IsRootHandle(right, &root_index)) {
+ DCHECK_NE(left, nullptr);
InstructionCode opcode =
kX64Cmp32 | AddressingModeField::encode(kMode_Root);
return VisitCompare(
selector, opcode,
g.TempImmediate(
TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)),
- g.UseRegister(m.left().node()), cont);
+ g.UseRegister(left), cont);
}
}
VisitWordCompare(selector, node, kX64Cmp32, cont);
@@ -2674,6 +2700,7 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4MinU) \
V(I32x4MaxU) \
V(I32x4GeU) \
+ V(I32x4DotI16x8S) \
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSaturateS) \
@@ -2766,16 +2793,16 @@ VISIT_ATOMIC_BINOP(Xor)
V(I8x16ShrU)
#define SIMD_ANYTRUE_LIST(V) \
- V(S1x2AnyTrue) \
- V(S1x4AnyTrue) \
- V(S1x8AnyTrue) \
- V(S1x16AnyTrue)
+ V(V64x2AnyTrue) \
+ V(V32x4AnyTrue) \
+ V(V16x8AnyTrue) \
+ V(V8x16AnyTrue)
#define SIMD_ALLTRUE_LIST(V) \
- V(S1x2AllTrue) \
- V(S1x4AllTrue) \
- V(S1x8AllTrue) \
- V(S1x16AllTrue)
+ V(V64x2AllTrue) \
+ V(V32x4AllTrue) \
+ V(V16x8AllTrue) \
+ V(V8x16AllTrue)
void InstructionSelector::VisitS128Zero(Node* node) {
X64OperandGenerator g(this);