diff options
Diffstat (limited to 'chromium/v8/src/compiler/backend/x64')
4 files changed, 208 insertions, 77 deletions
diff --git a/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc b/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc index 4f99ad49ba8..110a478c543 100644 --- a/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc +++ b/chromium/v8/src/compiler/backend/x64/code-generator-x64.cc @@ -194,6 +194,94 @@ class OutOfLineLoadFloat64NaN final : public OutOfLineCode { XMMRegister const result_; }; +class OutOfLineF32x4Min final : public OutOfLineCode { + public: + OutOfLineF32x4Min(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // propagate -0's and NaNs (possibly non-canonical) from the error. + __ Orps(error_, result_); + // Canonicalize NaNs by quieting and clearing the payload. + __ Cmpps(result_, error_, int8_t{3}); + __ Orps(error_, result_); + __ Psrld(result_, byte{10}); + __ Andnps(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + +class OutOfLineF64x2Min final : public OutOfLineCode { + public: + OutOfLineF64x2Min(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // propagate -0's and NaNs (possibly non-canonical) from the error. + __ Orpd(error_, result_); + // Canonicalize NaNs by quieting and clearing the payload. + __ Cmppd(result_, error_, int8_t{3}); + __ Orpd(error_, result_); + __ Psrlq(result_, 13); + __ Andnpd(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + +class OutOfLineF32x4Max final : public OutOfLineCode { + public: + OutOfLineF32x4Max(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // Propagate NaNs (possibly non-canonical). + __ Orps(result_, error_); + // Propagate sign errors and (subtle) quiet NaNs. + __ Subps(result_, error_); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ Cmpps(error_, result_, int8_t{3}); + __ Psrld(error_, byte{10}); + __ Andnps(error_, result_); + __ Movaps(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + +class OutOfLineF64x2Max final : public OutOfLineCode { + public: + OutOfLineF64x2Max(CodeGenerator* gen, XMMRegister result, XMMRegister error) + : OutOfLineCode(gen), result_(result), error_(error) {} + + void Generate() final { + // |result| is the partial result, |kScratchDoubleReg| is the error. + // Propagate NaNs (possibly non-canonical). + __ Orpd(result_, error_); + // Propagate sign errors and (subtle) quiet NaNs. + __ Subpd(result_, error_); + // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. + __ Cmppd(error_, result_, int8_t{3}); + __ Psrlq(error_, byte{13}); + __ Andnpd(error_, result_); + __ Movapd(result_, error_); + } + + private: + XMMRegister const result_; + XMMRegister const error_; +}; + class OutOfLineTruncateDoubleToI final : public OutOfLineCode { public: OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, @@ -2328,18 +2416,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - // The minpd instruction doesn't propagate NaNs and +0's in its first - // operand. Perform minpd in both orders, merge the resuls, and adjust. + // The minpd instruction doesn't propagate NaNs and -0's in its first + // operand. Perform minpd in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movapd(kScratchDoubleReg, src1); __ Minpd(kScratchDoubleReg, dst); __ Minpd(dst, src1); - // propagate -0's and NaNs, which may be non-canonical. - __ Orpd(kScratchDoubleReg, dst); - // Canonicalize NaNs by quieting and clearing the payload. - __ Cmppd(dst, kScratchDoubleReg, int8_t{3}); - __ Orpd(kScratchDoubleReg, dst); - __ Psrlq(dst, 13); - __ Andnpd(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorpd(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF64x2Min(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F64x2Max: { @@ -2347,20 +2435,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); // The maxpd instruction doesn't propagate NaNs and +0's in its first - // operand. Perform maxpd in both orders, merge the resuls, and adjust. + // operand. Perform maxpd in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movapd(kScratchDoubleReg, src1); __ Maxpd(kScratchDoubleReg, dst); __ Maxpd(dst, src1); - // Find discrepancies. - __ Xorpd(dst, kScratchDoubleReg); - // Propagate NaNs, which may be non-canonical. - __ Orpd(kScratchDoubleReg, dst); - // Propagate sign discrepancy and (subtle) quiet NaNs. - __ Subpd(kScratchDoubleReg, dst); - // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. - __ Cmppd(dst, kScratchDoubleReg, int8_t{3}); - __ Psrlq(dst, 13); - __ Andnpd(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorpd(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF64x2Max(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F64x2Eq: { @@ -2524,18 +2609,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( XMMRegister src1 = i.InputSimd128Register(1), dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); - // The minps instruction doesn't propagate NaNs and +0's in its first - // operand. Perform minps in both orders, merge the resuls, and adjust. + // The minps instruction doesn't propagate NaNs and -0's in its first + // operand. Perform minps in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movaps(kScratchDoubleReg, src1); __ Minps(kScratchDoubleReg, dst); __ Minps(dst, src1); - // propagate -0's and NaNs, which may be non-canonical. - __ Orps(kScratchDoubleReg, dst); - // Canonicalize NaNs by quieting and clearing the payload. - __ Cmpps(dst, kScratchDoubleReg, int8_t{3}); - __ Orps(kScratchDoubleReg, dst); - __ Psrld(dst, byte{10}); - __ Andnps(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorps(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF32x4Min(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F32x4Max: { @@ -2543,20 +2628,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); // The maxps instruction doesn't propagate NaNs and +0's in its first - // operand. Perform maxps in both orders, merge the resuls, and adjust. + // operand. Perform maxps in both orders and compare results. Handle the + // unlikely case of discrepancies out of line. __ Movaps(kScratchDoubleReg, src1); __ Maxps(kScratchDoubleReg, dst); __ Maxps(dst, src1); - // Find discrepancies. - __ Xorps(dst, kScratchDoubleReg); - // Propagate NaNs, which may be non-canonical. - __ Orps(kScratchDoubleReg, dst); - // Propagate sign discrepancy and (subtle) quiet NaNs. - __ Subps(kScratchDoubleReg, dst); - // Canonicalize NaNs by clearing the payload. Sign is non-deterministic. - __ Cmpps(dst, kScratchDoubleReg, int8_t{3}); - __ Psrld(dst, byte{10}); - __ Andnps(dst, kScratchDoubleReg); + // Most likely there is no difference and we're done. + __ Xorps(kScratchDoubleReg, dst); + __ Ptest(kScratchDoubleReg, kScratchDoubleReg); + auto ool = new (zone()) OutOfLineF32x4Max(this, dst, kScratchDoubleReg); + __ j(not_zero, ool->entry()); + __ bind(ool->exit()); break; } case kX64F32x4Eq: { @@ -2619,6 +2701,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Maxps(dst, i.InputSimd128Register(1)); break; } + case kX64F32x4Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode); + break; + } + case kX64F64x2Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode); + break; + } case kX64F64x2Pmin: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); @@ -3093,6 +3187,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0)); break; } + case kX64I32x4DotI16x8S: { + __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1)); + break; + } case kX64S128Zero: { XMMRegister dst = i.OutputSimd128Register(); __ Xorps(dst, dst); @@ -3926,10 +4024,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Por(dst, kScratchDoubleReg); break; } - case kX64S1x2AnyTrue: - case kX64S1x4AnyTrue: - case kX64S1x8AnyTrue: - case kX64S1x16AnyTrue: { + case kX64V64x2AnyTrue: + case kX64V32x4AnyTrue: + case kX64V16x8AnyTrue: + case kX64V8x16AnyTrue: { Register dst = i.OutputRegister(); XMMRegister src = i.InputSimd128Register(0); @@ -3942,19 +4040,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1 // respectively. - case kX64S1x2AllTrue: { + case kX64V64x2AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq); break; } - case kX64S1x4AllTrue: { + case kX64V32x4AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd); break; } - case kX64S1x8AllTrue: { + case kX64V16x8AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw); break; } - case kX64S1x16AllTrue: { + case kX64V8x16AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb); break; } diff --git a/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h b/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h index 745f5c6cb25..ed7d2060f59 100644 --- a/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h +++ b/chromium/v8/src/compiler/backend/x64/instruction-codes-x64.h @@ -174,6 +174,7 @@ namespace compiler { V(X64F64x2Qfms) \ V(X64F64x2Pmin) \ V(X64F64x2Pmax) \ + V(X64F64x2Round) \ V(X64F32x4Splat) \ V(X64F32x4ExtractLane) \ V(X64F32x4ReplaceLane) \ @@ -199,6 +200,7 @@ namespace compiler { V(X64F32x4Qfms) \ V(X64F32x4Pmin) \ V(X64F32x4Pmax) \ + V(X64F32x4Round) \ V(X64I64x2Splat) \ V(X64I64x2ExtractLane) \ V(X64I64x2ReplaceLane) \ @@ -248,6 +250,7 @@ namespace compiler { V(X64I32x4GeU) \ V(X64I32x4Abs) \ V(X64I32x4BitMask) \ + V(X64I32x4DotI16x8S) \ V(X64I16x8Splat) \ V(X64I16x8ExtractLaneU) \ V(X64I16x8ExtractLaneS) \ @@ -357,14 +360,14 @@ namespace compiler { V(X64S8x8Reverse) \ V(X64S8x4Reverse) \ V(X64S8x2Reverse) \ - V(X64S1x2AnyTrue) \ - V(X64S1x2AllTrue) \ - V(X64S1x4AnyTrue) \ - V(X64S1x4AllTrue) \ - V(X64S1x8AnyTrue) \ - V(X64S1x8AllTrue) \ - V(X64S1x16AnyTrue) \ - V(X64S1x16AllTrue) \ + V(X64V64x2AnyTrue) \ + V(X64V64x2AllTrue) \ + V(X64V32x4AnyTrue) \ + V(X64V32x4AllTrue) \ + V(X64V16x8AnyTrue) \ + V(X64V16x8AllTrue) \ + V(X64V8x16AnyTrue) \ + V(X64V8x16AllTrue) \ V(X64Word64AtomicLoadUint8) \ V(X64Word64AtomicLoadUint16) \ V(X64Word64AtomicLoadUint32) \ diff --git a/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc index d2c1d14855c..395c4a4e9c7 100644 --- a/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc +++ b/chromium/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc @@ -146,6 +146,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F64x2Qfms: case kX64F64x2Pmin: case kX64F64x2Pmax: + case kX64F64x2Round: case kX64F32x4Splat: case kX64F32x4ExtractLane: case kX64F32x4ReplaceLane: @@ -171,6 +172,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64F32x4Qfms: case kX64F32x4Pmin: case kX64F32x4Pmax: + case kX64F32x4Round: case kX64I64x2Splat: case kX64I64x2ExtractLane: case kX64I64x2ReplaceLane: @@ -220,6 +222,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64I32x4GeU: case kX64I32x4Abs: case kX64I32x4BitMask: + case kX64I32x4DotI16x8S: case kX64I16x8Splat: case kX64I16x8ExtractLaneU: case kX64I16x8ExtractLaneS: @@ -292,12 +295,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64S128Select: case kX64S128Zero: case kX64S128AndNot: - case kX64S1x2AnyTrue: - case kX64S1x2AllTrue: - case kX64S1x4AnyTrue: - case kX64S1x4AllTrue: - case kX64S1x8AnyTrue: - case kX64S1x8AllTrue: + case kX64V64x2AnyTrue: + case kX64V64x2AllTrue: + case kX64V32x4AnyTrue: + case kX64V32x4AllTrue: + case kX64V16x8AnyTrue: + case kX64V16x8AllTrue: case kX64S8x16Swizzle: case kX64S8x16Shuffle: case kX64S32x4Swizzle: @@ -325,8 +328,8 @@ int InstructionScheduler::GetTargetInstructionFlags( case kX64S8x8Reverse: case kX64S8x4Reverse: case kX64S8x2Reverse: - case kX64S1x16AnyTrue: - case kX64S1x16AllTrue: + case kX64V8x16AnyTrue: + case kX64V8x16AllTrue: return (instr->addressing_mode() == kMode_None) ? kNoOpcodeFlags : kIsLoadOperation | kHasSideEffect; diff --git a/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc index dd3f556937d..ab669864954 100644 --- a/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc +++ b/chromium/v8/src/compiler/backend/x64/instruction-selector-x64.cc @@ -1461,7 +1461,16 @@ void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input, V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \ V(Float32RoundTiesEven, \ kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \ - V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest)) + V(Float64RoundTiesEven, \ + kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \ + V(F32x4Ceil, kX64F32x4Round | MiscField::encode(kRoundUp)) \ + V(F32x4Floor, kX64F32x4Round | MiscField::encode(kRoundDown)) \ + V(F32x4Trunc, kX64F32x4Round | MiscField::encode(kRoundToZero)) \ + V(F32x4NearestInt, kX64F32x4Round | MiscField::encode(kRoundToNearest)) \ + V(F64x2Ceil, kX64F64x2Round | MiscField::encode(kRoundUp)) \ + V(F64x2Floor, kX64F64x2Round | MiscField::encode(kRoundDown)) \ + V(F64x2Trunc, kX64F64x2Round | MiscField::encode(kRoundToZero)) \ + V(F64x2NearestInt, kX64F64x2Round | MiscField::encode(kRoundToNearest)) #define RO_VISITOR(Name, opcode) \ void InstructionSelector::Visit##Name(Node* node) { \ @@ -1898,16 +1907,33 @@ void VisitWord32EqualImpl(InstructionSelector* selector, Node* node, X64OperandGenerator g(selector); const RootsTable& roots_table = selector->isolate()->roots_table(); RootIndex root_index; - CompressedHeapObjectBinopMatcher m(node); - if (m.right().HasValue() && - roots_table.IsRootHandle(m.right().Value(), &root_index)) { + Node* left = nullptr; + Handle<HeapObject> right; + // HeapConstants and CompressedHeapConstants can be treated the same when + // using them as an input to a 32-bit comparison. Check whether either is + // present. + { + CompressedHeapObjectBinopMatcher m(node); + if (m.right().HasValue()) { + left = m.left().node(); + right = m.right().Value(); + } else { + HeapObjectBinopMatcher m2(node); + if (m2.right().HasValue()) { + left = m2.left().node(); + right = m2.right().Value(); + } + } + } + if (!right.is_null() && roots_table.IsRootHandle(right, &root_index)) { + DCHECK_NE(left, nullptr); InstructionCode opcode = kX64Cmp32 | AddressingModeField::encode(kMode_Root); return VisitCompare( selector, opcode, g.TempImmediate( TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)), - g.UseRegister(m.left().node()), cont); + g.UseRegister(left), cont); } } VisitWordCompare(selector, node, kX64Cmp32, cont); @@ -2674,6 +2700,7 @@ VISIT_ATOMIC_BINOP(Xor) V(I32x4MinU) \ V(I32x4MaxU) \ V(I32x4GeU) \ + V(I32x4DotI16x8S) \ V(I16x8SConvertI32x4) \ V(I16x8Add) \ V(I16x8AddSaturateS) \ @@ -2766,16 +2793,16 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16ShrU) #define SIMD_ANYTRUE_LIST(V) \ - V(S1x2AnyTrue) \ - V(S1x4AnyTrue) \ - V(S1x8AnyTrue) \ - V(S1x16AnyTrue) + V(V64x2AnyTrue) \ + V(V32x4AnyTrue) \ + V(V16x8AnyTrue) \ + V(V8x16AnyTrue) #define SIMD_ALLTRUE_LIST(V) \ - V(S1x2AllTrue) \ - V(S1x4AllTrue) \ - V(S1x8AllTrue) \ - V(S1x16AllTrue) + V(V64x2AllTrue) \ + V(V32x4AllTrue) \ + V(V16x8AllTrue) \ + V(V8x16AllTrue) void InstructionSelector::VisitS128Zero(Node* node) { X64OperandGenerator g(this); |