diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/v8/src/compiler/backend/ia32 | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-85-based.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/v8/src/compiler/backend/ia32')
4 files changed, 92 insertions, 37 deletions
diff --git a/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc b/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc index c673458c753..f5a69eec3ea 100644 --- a/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc +++ b/chromium/v8/src/compiler/backend/ia32/code-generator-ia32.cc @@ -2032,6 +2032,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Maxpd(dst, dst, i.InputSimd128Register(1)); break; } + case kIA32F64x2Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode); + break; + } case kIA32I64x2SplatI32Pair: { XMMRegister dst = i.OutputSimd128Register(); __ Pinsrd(dst, i.InputRegister(0), 0); @@ -2442,6 +2448,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Maxps(dst, dst, i.InputSimd128Register(1)); break; } + case kIA32F32x4Round: { + RoundingMode const mode = + static_cast<RoundingMode>(MiscField::decode(instr->opcode())); + __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode); + break; + } case kIA32I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); @@ -2795,6 +2807,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0)); break; } + case kIA32I32x4DotI16x8S: { + __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0), + i.InputSimd128Register(1)); + break; + } case kIA32I16x8Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); @@ -3687,7 +3704,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // Out-of-range indices should return 0, add 112 so that any value > 15 // saturates to 128 (top bit set), so pshufb will zero that lane. - __ Move(mask, (uint32_t)0x70707070); + __ Move(mask, uint32_t{0x70707070}); __ Pshufd(mask, mask, 0x0); __ Paddusb(mask, i.InputSimd128Register(1)); __ Pshufb(dst, mask); @@ -4094,9 +4111,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ vpor(dst, dst, kScratchDoubleReg); break; } - case kIA32S1x4AnyTrue: - case kIA32S1x8AnyTrue: - case kIA32S1x16AnyTrue: { + case kIA32V32x4AnyTrue: + case kIA32V16x8AnyTrue: + case kIA32V8x16AnyTrue: { Register dst = i.OutputRegister(); XMMRegister src = i.InputSimd128Register(0); Register tmp = i.TempRegister(0); @@ -4110,13 +4127,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1 // respectively. - case kIA32S1x4AllTrue: + case kIA32V32x4AllTrue: ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd); break; - case kIA32S1x8AllTrue: + case kIA32V16x8AllTrue: ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw); break; - case kIA32S1x16AllTrue: { + case kIA32V8x16AllTrue: { ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb); break; } diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h b/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h index d347d672021..4c49539c4e9 100644 --- a/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h +++ b/chromium/v8/src/compiler/backend/ia32/instruction-codes-ia32.h @@ -136,6 +136,7 @@ namespace compiler { V(IA32F64x2Le) \ V(IA32F64x2Pmin) \ V(IA32F64x2Pmax) \ + V(IA32F64x2Round) \ V(IA32I64x2SplatI32Pair) \ V(IA32I64x2ReplaceLaneI32Pair) \ V(IA32I64x2Neg) \ @@ -186,6 +187,7 @@ namespace compiler { V(AVXF32x4Le) \ V(IA32F32x4Pmin) \ V(IA32F32x4Pmax) \ + V(IA32F32x4Round) \ V(IA32I32x4Splat) \ V(IA32I32x4ExtractLane) \ V(SSEI32x4ReplaceLane) \ @@ -232,6 +234,7 @@ namespace compiler { V(AVXI32x4GeU) \ V(IA32I32x4Abs) \ V(IA32I32x4BitMask) \ + V(IA32I32x4DotI16x8S) \ V(IA32I16x8Splat) \ V(IA32I16x8ExtractLaneU) \ V(IA32I16x8ExtractLaneS) \ @@ -396,12 +399,12 @@ namespace compiler { V(AVXS8x4Reverse) \ V(SSES8x2Reverse) \ V(AVXS8x2Reverse) \ - V(IA32S1x4AnyTrue) \ - V(IA32S1x4AllTrue) \ - V(IA32S1x8AnyTrue) \ - V(IA32S1x8AllTrue) \ - V(IA32S1x16AnyTrue) \ - V(IA32S1x16AllTrue) \ + V(IA32V32x4AnyTrue) \ + V(IA32V32x4AllTrue) \ + V(IA32V16x8AnyTrue) \ + V(IA32V16x8AllTrue) \ + V(IA32V8x16AnyTrue) \ + V(IA32V8x16AllTrue) \ V(IA32Word32AtomicPairLoad) \ V(IA32Word32AtomicPairStore) \ V(IA32Word32AtomicPairAdd) \ diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc index 52f0b0356ff..6d0062ba09e 100644 --- a/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc +++ b/chromium/v8/src/compiler/backend/ia32/instruction-scheduler-ia32.cc @@ -117,6 +117,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kIA32F64x2Le: case kIA32F64x2Pmin: case kIA32F64x2Pmax: + case kIA32F64x2Round: case kIA32I64x2SplatI32Pair: case kIA32I64x2ReplaceLaneI32Pair: case kIA32I64x2Neg: @@ -167,6 +168,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXF32x4Le: case kIA32F32x4Pmin: case kIA32F32x4Pmax: + case kIA32F32x4Round: case kIA32I32x4Splat: case kIA32I32x4ExtractLane: case kSSEI32x4ReplaceLane: @@ -213,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXI32x4GeU: case kIA32I32x4Abs: case kIA32I32x4BitMask: + case kIA32I32x4DotI16x8S: case kIA32I16x8Splat: case kIA32I16x8ExtractLaneU: case kIA32I16x8ExtractLaneS: @@ -367,12 +370,12 @@ int InstructionScheduler::GetTargetInstructionFlags( case kAVXS8x4Reverse: case kSSES8x2Reverse: case kAVXS8x2Reverse: - case kIA32S1x4AnyTrue: - case kIA32S1x4AllTrue: - case kIA32S1x8AnyTrue: - case kIA32S1x8AllTrue: - case kIA32S1x16AnyTrue: - case kIA32S1x16AllTrue: + case kIA32V32x4AnyTrue: + case kIA32V32x4AllTrue: + case kIA32V16x8AnyTrue: + case kIA32V16x8AllTrue: + case kIA32V8x16AnyTrue: + case kIA32V8x16AllTrue: return (instr->addressing_mode() == kMode_None) ? kNoOpcodeFlags : kIsLoadOperation | kHasSideEffect; diff --git a/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc b/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc index c50464f4b86..5ed7c24e6bf 100644 --- a/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc +++ b/chromium/v8/src/compiler/backend/ia32/instruction-selector-ia32.cc @@ -277,6 +277,23 @@ void VisitRRSimd(InstructionSelector* selector, Node* node, } } +// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be +// a register as we don't have memory alignment yet. For AVX, memory operands +// are fine, but can have performance issues if not aligned to 16/32 bytes +// (based on load size), see SDM Vol 1, chapter 14.9 +void VisitRROSimd(InstructionSelector* selector, Node* node, + ArchOpcode avx_opcode, ArchOpcode sse_opcode) { + IA32OperandGenerator g(selector); + InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); + if (selector->IsSupported(AVX)) { + selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, + g.Use(node->InputAt(1))); + } else { + selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, + g.UseRegister(node->InputAt(1))); + } +} + void VisitRRISimd(InstructionSelector* selector, Node* node, ArchOpcode opcode) { IA32OperandGenerator g(selector); @@ -941,7 +958,16 @@ void InstructionSelector::VisitWord32Ror(Node* node) { V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \ V(Float32RoundTiesEven, \ kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \ - V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest)) + V(Float64RoundTiesEven, \ + kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \ + V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \ + V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \ + V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \ + V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \ + V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \ + V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \ + V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \ + V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest)) #define RRO_FLOAT_OP_LIST(V) \ V(Float32Add, kAVXFloat32Add, kSSEFloat32Add) \ @@ -2100,6 +2126,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { #define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \ V(I64x2Add) \ V(I64x2Sub) \ + V(I32x4DotI16x8S) \ V(I16x8RoundingAverageU) \ V(I8x16RoundingAverageU) @@ -2131,14 +2158,14 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { V(S128Not) #define SIMD_ANYTRUE_LIST(V) \ - V(S1x4AnyTrue) \ - V(S1x8AnyTrue) \ - V(S1x16AnyTrue) + V(V32x4AnyTrue) \ + V(V16x8AnyTrue) \ + V(V8x16AnyTrue) #define SIMD_ALLTRUE_LIST(V) \ - V(S1x4AllTrue) \ - V(S1x8AllTrue) \ - V(S1x16AllTrue) + V(V32x4AllTrue) \ + V(V16x8AllTrue) \ + V(V8x16AllTrue) #define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \ V(I64x2Shl) \ @@ -2372,10 +2399,15 @@ SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX) #undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX #undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX -#define VISIT_SIMD_UNOP(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - IA32OperandGenerator g(this); \ - Emit(kIA32##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \ +// TODO(v8:9198): SSE requires operand0 to be a register as we don't have memory +// alignment yet. For AVX, memory operands are fine, but can have performance +// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1, +// chapter 14.9 +#define VISIT_SIMD_UNOP(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + IA32OperandGenerator g(this); \ + Emit(kIA32##Opcode, g.DefineAsRegister(node), \ + g.UseRegister(node->InputAt(0))); \ } SIMD_UNOP_LIST(VISIT_SIMD_UNOP) #undef VISIT_SIMD_UNOP @@ -2407,23 +2439,23 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE) IA32OperandGenerator g(this); \ InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ Emit(kIA32##Opcode, g.DefineAsRegister(node), \ - g.UseUnique(node->InputAt(0)), arraysize(temps), temps); \ + g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \ } SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE) #undef VISIT_SIMD_ALLTRUE #undef SIMD_ALLTRUE_LIST -#define VISIT_SIMD_BINOP(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - VisitRROFloat(this, node, kAVX##Opcode, kSSE##Opcode); \ +#define VISIT_SIMD_BINOP(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \ } SIMD_BINOP_LIST(VISIT_SIMD_BINOP) #undef VISIT_SIMD_BINOP #undef SIMD_BINOP_LIST -#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \ - void InstructionSelector::Visit##Opcode(Node* node) { \ - VisitRROFloat(this, node, kIA32##Opcode, kIA32##Opcode); \ +#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \ + void InstructionSelector::Visit##Opcode(Node* node) { \ + VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \ } SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX) #undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX |