diff options
Diffstat (limited to 'deps/v8/src/codegen/s390/macro-assembler-s390.cc')
-rw-r--r-- | deps/v8/src/codegen/s390/macro-assembler-s390.cc | 998 |
1 files changed, 774 insertions, 224 deletions
diff --git a/deps/v8/src/codegen/s390/macro-assembler-s390.cc b/deps/v8/src/codegen/s390/macro-assembler-s390.cc index 7080e89eec..79a0130de2 100644 --- a/deps/v8/src/codegen/s390/macro-assembler-s390.cc +++ b/deps/v8/src/codegen/s390/macro-assembler-s390.cc @@ -284,19 +284,10 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, Register exclusion2, Register exclusion3) const { int bytes = 0; - RegList exclusions = 0; - if (exclusion1 != no_reg) { - exclusions |= exclusion1.bit(); - if (exclusion2 != no_reg) { - exclusions |= exclusion2.bit(); - if (exclusion3 != no_reg) { - exclusions |= exclusion3.bit(); - } - } - } - RegList list = kJSCallerSaved & ~exclusions; - bytes += NumRegs(list) * kSystemPointerSize; + RegList exclusions = {exclusion1, exclusion2, exclusion3}; + RegList list = kJSCallerSaved - exclusions; + bytes += list.Count() * kSystemPointerSize; if (fp_mode == SaveFPRegsMode::kSave) { bytes += kStackSavedSavedFPSizeInBytes; @@ -308,20 +299,11 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, Register exclusion2, Register exclusion3) { int bytes = 0; - RegList exclusions = 0; - if (exclusion1 != no_reg) { - exclusions |= exclusion1.bit(); - if (exclusion2 != no_reg) { - exclusions |= exclusion2.bit(); - if (exclusion3 != no_reg) { - exclusions |= exclusion3.bit(); - } - } - } - RegList list = kJSCallerSaved & ~exclusions; + RegList exclusions = {exclusion1, exclusion2, exclusion3}; + RegList list = kJSCallerSaved - exclusions; MultiPush(list); - bytes += NumRegs(list) * kSystemPointerSize; + bytes += list.Count() * kSystemPointerSize; if (fp_mode == SaveFPRegsMode::kSave) { MultiPushF64OrV128(kCallerSavedDoubles); @@ -339,20 +321,10 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, bytes += kStackSavedSavedFPSizeInBytes; } - RegList exclusions = 0; - if (exclusion1 != no_reg) { - exclusions |= exclusion1.bit(); - if (exclusion2 != no_reg) { - exclusions |= exclusion2.bit(); - if (exclusion3 != no_reg) { - exclusions |= exclusion3.bit(); - } - } - } - - RegList list = kJSCallerSaved & ~exclusions; + RegList exclusions = {exclusion1, exclusion2, exclusion3}; + RegList list = kJSCallerSaved - exclusions; MultiPop(list); - bytes += NumRegs(list) * kSystemPointerSize; + bytes += list.Count() * kSystemPointerSize; return bytes; } @@ -578,6 +550,10 @@ void TurboAssembler::Move(DoubleRegister dst, DoubleRegister src) { } } +void TurboAssembler::Move(Register dst, const MemOperand& src) { + LoadU64(dst, src); +} + // Wrapper around Assembler::mvc (SS-a format) void TurboAssembler::MoveChar(const MemOperand& opnd1, const MemOperand& opnd2, const Operand& length) { @@ -653,12 +629,12 @@ void TurboAssembler::PushArray(Register array, Register size, Register scratch, } void TurboAssembler::MultiPush(RegList regs, Register location) { - int16_t num_to_push = base::bits::CountPopulation(regs); + int16_t num_to_push = regs.Count(); int16_t stack_offset = num_to_push * kSystemPointerSize; SubS64(location, location, Operand(stack_offset)); for (int16_t i = Register::kNumRegisters - 1; i >= 0; i--) { - if ((regs & (1 << i)) != 0) { + if ((regs.bits() & (1 << i)) != 0) { stack_offset -= kSystemPointerSize; StoreU64(ToRegister(i), MemOperand(location, stack_offset)); } @@ -669,7 +645,7 @@ void TurboAssembler::MultiPop(RegList regs, Register location) { int16_t stack_offset = 0; for (int16_t i = 0; i < Register::kNumRegisters; i++) { - if ((regs & (1 << i)) != 0) { + if ((regs.bits() & (1 << i)) != 0) { LoadU64(ToRegister(i), MemOperand(location, stack_offset)); stack_offset += kSystemPointerSize; } @@ -677,13 +653,13 @@ void TurboAssembler::MultiPop(RegList regs, Register location) { AddS64(location, location, Operand(stack_offset)); } -void TurboAssembler::MultiPushDoubles(RegList dregs, Register location) { - int16_t num_to_push = base::bits::CountPopulation(dregs); +void TurboAssembler::MultiPushDoubles(DoubleRegList dregs, Register location) { + int16_t num_to_push = dregs.Count(); int16_t stack_offset = num_to_push * kDoubleSize; SubS64(location, location, Operand(stack_offset)); for (int16_t i = DoubleRegister::kNumRegisters - 1; i >= 0; i--) { - if ((dregs & (1 << i)) != 0) { + if ((dregs.bits() & (1 << i)) != 0) { DoubleRegister dreg = DoubleRegister::from_code(i); stack_offset -= kDoubleSize; StoreF64(dreg, MemOperand(location, stack_offset)); @@ -691,13 +667,13 @@ void TurboAssembler::MultiPushDoubles(RegList dregs, Register location) { } } -void TurboAssembler::MultiPushV128(RegList dregs, Register location) { - int16_t num_to_push = base::bits::CountPopulation(dregs); +void TurboAssembler::MultiPushV128(DoubleRegList dregs, Register location) { + int16_t num_to_push = dregs.Count(); int16_t stack_offset = num_to_push * kSimd128Size; SubS64(location, location, Operand(stack_offset)); for (int16_t i = Simd128Register::kNumRegisters - 1; i >= 0; i--) { - if ((dregs & (1 << i)) != 0) { + if ((dregs.bits() & (1 << i)) != 0) { Simd128Register dreg = Simd128Register::from_code(i); stack_offset -= kSimd128Size; StoreV128(dreg, MemOperand(location, stack_offset), r0); @@ -705,11 +681,11 @@ void TurboAssembler::MultiPushV128(RegList dregs, Register location) { } } -void TurboAssembler::MultiPopDoubles(RegList dregs, Register location) { +void TurboAssembler::MultiPopDoubles(DoubleRegList dregs, Register location) { int16_t stack_offset = 0; for (int16_t i = 0; i < DoubleRegister::kNumRegisters; i++) { - if ((dregs & (1 << i)) != 0) { + if ((dregs.bits() & (1 << i)) != 0) { DoubleRegister dreg = DoubleRegister::from_code(i); LoadF64(dreg, MemOperand(location, stack_offset)); stack_offset += kDoubleSize; @@ -718,11 +694,11 @@ void TurboAssembler::MultiPopDoubles(RegList dregs, Register location) { AddS64(location, location, Operand(stack_offset)); } -void TurboAssembler::MultiPopV128(RegList dregs, Register location) { +void TurboAssembler::MultiPopV128(DoubleRegList dregs, Register location) { int16_t stack_offset = 0; for (int16_t i = 0; i < Simd128Register::kNumRegisters; i++) { - if ((dregs & (1 << i)) != 0) { + if ((dregs.bits() & (1 << i)) != 0) { Simd128Register dreg = Simd128Register::from_code(i); LoadV128(dreg, MemOperand(location, stack_offset), r0); stack_offset += kSimd128Size; @@ -731,7 +707,8 @@ void TurboAssembler::MultiPopV128(RegList dregs, Register location) { AddS64(location, location, Operand(stack_offset)); } -void TurboAssembler::MultiPushF64OrV128(RegList dregs, Register location) { +void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs, + Register location) { #if V8_ENABLE_WEBASSEMBLY bool generating_bultins = isolate() && isolate()->IsGeneratingEmbeddedBuiltins(); @@ -749,14 +726,14 @@ void TurboAssembler::MultiPushF64OrV128(RegList dregs, Register location) { MultiPushDoubles(dregs); // We still need to allocate empty space on the stack as if // Simd rgeisters were saved (see kFixedFrameSizeFromFp). - lay(sp, MemOperand(sp, -(NumRegs(dregs) * kDoubleSize))); + lay(sp, MemOperand(sp, -(dregs.Count() * kDoubleSize))); bind(&simd_pushed); } else { if (CpuFeatures::SupportsWasmSimd128()) { MultiPushV128(dregs); } else { MultiPushDoubles(dregs); - lay(sp, MemOperand(sp, -(NumRegs(dregs) * kDoubleSize))); + lay(sp, MemOperand(sp, -(dregs.Count() * kDoubleSize))); } } #else @@ -764,7 +741,7 @@ void TurboAssembler::MultiPushF64OrV128(RegList dregs, Register location) { #endif } -void TurboAssembler::MultiPopF64OrV128(RegList dregs, Register location) { +void TurboAssembler::MultiPopF64OrV128(DoubleRegList dregs, Register location) { #if V8_ENABLE_WEBASSEMBLY bool generating_bultins = isolate() && isolate()->IsGeneratingEmbeddedBuiltins(); @@ -779,14 +756,14 @@ void TurboAssembler::MultiPopF64OrV128(RegList dregs, Register location) { b(&simd_popped); bind(&pop_doubles); // Simd not supported, only pop double registers. - lay(sp, MemOperand(sp, NumRegs(dregs) * kDoubleSize)); + lay(sp, MemOperand(sp, dregs.Count() * kDoubleSize)); MultiPopDoubles(dregs); bind(&simd_popped); } else { if (CpuFeatures::SupportsWasmSimd128()) { MultiPopV128(dregs); } else { - lay(sp, MemOperand(sp, NumRegs(dregs) * kDoubleSize)); + lay(sp, MemOperand(sp, dregs.Count() * kDoubleSize)); MultiPopDoubles(dregs); } } @@ -891,6 +868,16 @@ void TurboAssembler::DecompressAnyTagged(Register destination, agr(destination, kRootRegister); RecordComment("]"); } + +void TurboAssembler::LoadTaggedSignedField(Register destination, + MemOperand field_operand) { + if (COMPRESS_POINTERS_BOOL) { + DecompressTaggedSigned(destination, field_operand); + } else { + LoadU64(destination, field_operand); + } +} + void MacroAssembler::RecordWriteField(Register object, int offset, Register value, Register slot_address, LinkRegisterStatus lr_status, @@ -933,25 +920,13 @@ void MacroAssembler::RecordWriteField(Register object, int offset, } void TurboAssembler::MaybeSaveRegisters(RegList registers) { - if (registers == 0) return; - RegList regs = 0; - for (int i = 0; i < Register::kNumRegisters; ++i) { - if ((registers >> i) & 1u) { - regs |= Register::from_code(i).bit(); - } - } - MultiPush(regs); + if (registers.is_empty()) return; + MultiPush(registers); } void TurboAssembler::MaybeRestoreRegisters(RegList registers) { - if (registers == 0) return; - RegList regs = 0; - for (int i = 0; i < Register::kNumRegisters; ++i) { - if ((registers >> i) & 1u) { - regs |= Register::from_code(i).bit(); - } - } - MultiPop(regs); + if (registers.is_empty()) return; + MultiPop(registers); } void TurboAssembler::CallEphemeronKeyBarrier(Register object, @@ -1090,6 +1065,7 @@ void MacroAssembler::RecordWrite(Register object, Register slot_address, } void TurboAssembler::PushCommonFrame(Register marker_reg) { + ASM_CODE_COMMENT(this); int fp_delta = 0; CleanseP(r14); if (marker_reg.is_valid()) { @@ -1443,6 +1419,7 @@ void TurboAssembler::DropArgumentsAndPushNewReceiver(Register argc, void TurboAssembler::EnterFrame(StackFrame::Type type, bool load_constant_pool_pointer_reg) { + ASM_CODE_COMMENT(this); // We create a stack frame with: // Return Addr <-- old sp // Old FP <-- new fp @@ -1450,14 +1427,19 @@ void TurboAssembler::EnterFrame(StackFrame::Type type, // type // CodeObject <-- new sp - mov(ip, Operand(StackFrame::TypeToMarker(type))); - PushCommonFrame(ip); + Register scratch = no_reg; + if (!StackFrame::IsJavaScript(type)) { + scratch = ip; + mov(scratch, Operand(StackFrame::TypeToMarker(type))); + } + PushCommonFrame(scratch); #if V8_ENABLE_WEBASSEMBLY if (type == StackFrame::WASM) Push(kWasmInstanceRegister); #endif // V8_ENABLE_WEBASSEMBLY } int TurboAssembler::LeaveFrame(StackFrame::Type type, int stack_adjustment) { + ASM_CODE_COMMENT(this); // Drop the execution stack down to the frame pointer and restore // the caller frame pointer, return address and constant pool pointer. LoadU64(r14, MemOperand(fp, StandardFrameConstants::kCallerPCOffset)); @@ -1698,11 +1680,7 @@ void MacroAssembler::InvokePrologue(Register expected_parameter_count, lay(dest, MemOperand(dest, kSystemPointerSize)); SubS64(num, num, Operand(1)); bind(&check); - if (kJSArgcIncludesReceiver) { - b(gt, ©); - } else { - b(ge, ©); - } + b(gt, ©); } // Fill remaining expected arguments with undefined values. @@ -2438,19 +2416,13 @@ void TurboAssembler::CheckPageFlag( Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3, Register reg4, Register reg5, Register reg6) { - RegList regs = 0; - if (reg1.is_valid()) regs |= reg1.bit(); - if (reg2.is_valid()) regs |= reg2.bit(); - if (reg3.is_valid()) regs |= reg3.bit(); - if (reg4.is_valid()) regs |= reg4.bit(); - if (reg5.is_valid()) regs |= reg5.bit(); - if (reg6.is_valid()) regs |= reg6.bit(); + RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6}; const RegisterConfiguration* config = RegisterConfiguration::Default(); for (int i = 0; i < config->num_allocatable_general_registers(); ++i) { int code = config->GetAllocatableGeneralCode(i); Register candidate = Register::from_code(code); - if (regs & candidate.bit()) continue; + if (regs.has(candidate)) continue; return candidate; } UNREACHABLE(); @@ -2923,6 +2895,25 @@ void TurboAssembler::AddU64(Register dst, const Operand& imm) { algfi(dst, imm); } +void TurboAssembler::AddU64(Register dst, Register src1, Register src2) { + if (dst != src2 && dst != src1) { + if (CpuFeatures::IsSupported(DISTINCT_OPS)) { + algrk(dst, src1, src2); + } else { + lgr(dst, src1); + algr(dst, src2); + } + } else if (dst != src2) { + // dst == src1 + DCHECK(dst == src1); + algr(dst, src2); + } else { + // dst == src2 + DCHECK(dst == src2); + algr(dst, src1); + } +} + // Add Logical 32-bit (Register-Memory) void TurboAssembler::AddU32(Register dst, const MemOperand& opnd) { DCHECK(is_int20(opnd.offset())); @@ -4736,6 +4727,19 @@ void TurboAssembler::CallBuiltinByIndex(Register builtin_index) { Call(builtin_index); } +void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin, + Register destination) { + ASM_CODE_COMMENT(this); + LoadU64(destination, EntryFromBuiltinAsOperand(builtin)); +} + +MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) { + ASM_CODE_COMMENT(this); + DCHECK(root_array_available()); + return MemOperand(kRootRegister, + IsolateData::BuiltinEntrySlotOffset(builtin)); +} + void TurboAssembler::LoadCodeObjectEntry(Register destination, Register code_object) { // Code objects are called differently depending on whether we are generating @@ -5104,8 +5108,6 @@ void TurboAssembler::AtomicExchangeU16(Register addr, Register value, } // Simd Support. -#define kScratchDoubleReg d13 - void TurboAssembler::F64x2Splat(Simd128Register dst, Simd128Register src) { vrep(dst, src, Operand(0), Condition(3)); } @@ -5135,69 +5137,70 @@ void TurboAssembler::I8x16Splat(Simd128Register dst, Register src) { } void TurboAssembler::F64x2ExtractLane(DoubleRegister dst, Simd128Register src, - uint8_t imm_lane_idx) { + uint8_t imm_lane_idx, Register) { vrep(dst, src, Operand(1 - imm_lane_idx), Condition(3)); } void TurboAssembler::F32x4ExtractLane(DoubleRegister dst, Simd128Register src, - uint8_t imm_lane_idx) { + uint8_t imm_lane_idx, Register) { vrep(dst, src, Operand(3 - imm_lane_idx), Condition(2)); } void TurboAssembler::I64x2ExtractLane(Register dst, Simd128Register src, - uint8_t imm_lane_idx) { + uint8_t imm_lane_idx, Register) { vlgv(dst, src, MemOperand(r0, 1 - imm_lane_idx), Condition(3)); } void TurboAssembler::I32x4ExtractLane(Register dst, Simd128Register src, - uint8_t imm_lane_idx) { + uint8_t imm_lane_idx, Register) { vlgv(dst, src, MemOperand(r0, 3 - imm_lane_idx), Condition(2)); } void TurboAssembler::I16x8ExtractLaneU(Register dst, Simd128Register src, - uint8_t imm_lane_idx) { + uint8_t imm_lane_idx, Register) { vlgv(dst, src, MemOperand(r0, 7 - imm_lane_idx), Condition(1)); } void TurboAssembler::I16x8ExtractLaneS(Register dst, Simd128Register src, - uint8_t imm_lane_idx) { - vlgv(r0, src, MemOperand(r0, 7 - imm_lane_idx), Condition(1)); - lghr(dst, r0); + uint8_t imm_lane_idx, Register scratch) { + vlgv(scratch, src, MemOperand(r0, 7 - imm_lane_idx), Condition(1)); + lghr(dst, scratch); } void TurboAssembler::I8x16ExtractLaneU(Register dst, Simd128Register src, - uint8_t imm_lane_idx) { + uint8_t imm_lane_idx, Register) { vlgv(dst, src, MemOperand(r0, 15 - imm_lane_idx), Condition(0)); } void TurboAssembler::I8x16ExtractLaneS(Register dst, Simd128Register src, - uint8_t imm_lane_idx) { - vlgv(r0, src, MemOperand(r0, 15 - imm_lane_idx), Condition(0)); - lgbr(dst, r0); + uint8_t imm_lane_idx, Register scratch) { + vlgv(scratch, src, MemOperand(r0, 15 - imm_lane_idx), Condition(0)); + lgbr(dst, scratch); } void TurboAssembler::F64x2ReplaceLane(Simd128Register dst, Simd128Register src1, - DoubleRegister src2, - uint8_t imm_lane_idx) { - vlgv(r0, src2, MemOperand(r0, 0), Condition(3)); + DoubleRegister src2, uint8_t imm_lane_idx, + Register scratch) { + vlgv(scratch, src2, MemOperand(r0, 0), Condition(3)); if (src1 != dst) { vlr(dst, src1, Condition(0), Condition(0), Condition(0)); } - vlvg(dst, r0, MemOperand(r0, 1 - imm_lane_idx), Condition(3)); + vlvg(dst, scratch, MemOperand(r0, 1 - imm_lane_idx), Condition(3)); } void TurboAssembler::F32x4ReplaceLane(Simd128Register dst, Simd128Register src1, - DoubleRegister src2, - uint8_t imm_lane_idx) { - vlgv(r0, src2, MemOperand(r0, 0), Condition(2)); + DoubleRegister src2, uint8_t imm_lane_idx, + Register scratch) { + vlgv(scratch, src2, MemOperand(r0, 0), Condition(2)); if (src1 != dst) { vlr(dst, src1, Condition(0), Condition(0), Condition(0)); } - vlvg(dst, r0, MemOperand(r0, 3 - imm_lane_idx), Condition(2)); + vlvg(dst, scratch, MemOperand(r0, 3 - imm_lane_idx), Condition(2)); } void TurboAssembler::I64x2ReplaceLane(Simd128Register dst, Simd128Register src1, - Register src2, uint8_t imm_lane_idx) { + Register src2, uint8_t imm_lane_idx, + Register) { if (src1 != dst) { vlr(dst, src1, Condition(0), Condition(0), Condition(0)); } @@ -5205,7 +5208,8 @@ void TurboAssembler::I64x2ReplaceLane(Simd128Register dst, Simd128Register src1, } void TurboAssembler::I32x4ReplaceLane(Simd128Register dst, Simd128Register src1, - Register src2, uint8_t imm_lane_idx) { + Register src2, uint8_t imm_lane_idx, + Register) { if (src1 != dst) { vlr(dst, src1, Condition(0), Condition(0), Condition(0)); } @@ -5213,7 +5217,8 @@ void TurboAssembler::I32x4ReplaceLane(Simd128Register dst, Simd128Register src1, } void TurboAssembler::I16x8ReplaceLane(Simd128Register dst, Simd128Register src1, - Register src2, uint8_t imm_lane_idx) { + Register src2, uint8_t imm_lane_idx, + Register) { if (src1 != dst) { vlr(dst, src1, Condition(0), Condition(0), Condition(0)); } @@ -5221,13 +5226,76 @@ void TurboAssembler::I16x8ReplaceLane(Simd128Register dst, Simd128Register src1, } void TurboAssembler::I8x16ReplaceLane(Simd128Register dst, Simd128Register src1, - Register src2, uint8_t imm_lane_idx) { + Register src2, uint8_t imm_lane_idx, + Register) { if (src1 != dst) { vlr(dst, src1, Condition(0), Condition(0), Condition(0)); } vlvg(dst, src2, MemOperand(r0, 15 - imm_lane_idx), Condition(0)); } +void TurboAssembler::S128Not(Simd128Register dst, Simd128Register src) { + vno(dst, src, src, Condition(0), Condition(0), Condition(0)); +} + +void TurboAssembler::S128Zero(Simd128Register dst, Simd128Register src) { + vx(dst, src, src, Condition(0), Condition(0), Condition(0)); +} + +void TurboAssembler::S128AllOnes(Simd128Register dst, Simd128Register src) { + vceq(dst, src, src, Condition(0), Condition(3)); +} + +void TurboAssembler::S128Select(Simd128Register dst, Simd128Register src1, + Simd128Register src2, Simd128Register mask) { + vsel(dst, src1, src2, mask, Condition(0), Condition(0)); +} + +#define SIMD_UNOP_LIST_VRR_A(V) \ + V(F64x2Abs, vfpso, 2, 0, 3) \ + V(F64x2Neg, vfpso, 0, 0, 3) \ + V(F64x2Sqrt, vfsq, 0, 0, 3) \ + V(F64x2Ceil, vfi, 6, 0, 3) \ + V(F64x2Floor, vfi, 7, 0, 3) \ + V(F64x2Trunc, vfi, 5, 0, 3) \ + V(F64x2NearestInt, vfi, 4, 0, 3) \ + V(F32x4Abs, vfpso, 2, 0, 2) \ + V(F32x4Neg, vfpso, 0, 0, 2) \ + V(F32x4Sqrt, vfsq, 0, 0, 2) \ + V(F32x4Ceil, vfi, 6, 0, 2) \ + V(F32x4Floor, vfi, 7, 0, 2) \ + V(F32x4Trunc, vfi, 5, 0, 2) \ + V(F32x4NearestInt, vfi, 4, 0, 2) \ + V(I64x2Abs, vlp, 0, 0, 3) \ + V(I64x2Neg, vlc, 0, 0, 3) \ + V(I64x2SConvertI32x4Low, vupl, 0, 0, 2) \ + V(I64x2SConvertI32x4High, vuph, 0, 0, 2) \ + V(I64x2UConvertI32x4Low, vupll, 0, 0, 2) \ + V(I64x2UConvertI32x4High, vuplh, 0, 0, 2) \ + V(I32x4Abs, vlp, 0, 0, 2) \ + V(I32x4Neg, vlc, 0, 0, 2) \ + V(I32x4SConvertI16x8Low, vupl, 0, 0, 1) \ + V(I32x4SConvertI16x8High, vuph, 0, 0, 1) \ + V(I32x4UConvertI16x8Low, vupll, 0, 0, 1) \ + V(I32x4UConvertI16x8High, vuplh, 0, 0, 1) \ + V(I16x8Abs, vlp, 0, 0, 1) \ + V(I16x8Neg, vlc, 0, 0, 1) \ + V(I16x8SConvertI8x16Low, vupl, 0, 0, 0) \ + V(I16x8SConvertI8x16High, vuph, 0, 0, 0) \ + V(I16x8UConvertI8x16Low, vupll, 0, 0, 0) \ + V(I16x8UConvertI8x16High, vuplh, 0, 0, 0) \ + V(I8x16Abs, vlp, 0, 0, 0) \ + V(I8x16Neg, vlc, 0, 0, 0) \ + V(I8x16Popcnt, vpopct, 0, 0, 0) + +#define EMIT_SIMD_UNOP_VRR_A(name, op, c1, c2, c3) \ + void TurboAssembler::name(Simd128Register dst, Simd128Register src) { \ + op(dst, src, Condition(c1), Condition(c2), Condition(c3)); \ + } +SIMD_UNOP_LIST_VRR_A(EMIT_SIMD_UNOP_VRR_A) +#undef EMIT_SIMD_UNOP_VRR_A +#undef SIMD_UNOP_LIST_VRR_A + #define SIMD_BINOP_LIST_VRR_B(V) \ V(I64x2Eq, vceq, 0, 3) \ V(I64x2GtS, vch, 0, 3) \ @@ -5250,43 +5318,53 @@ SIMD_BINOP_LIST_VRR_B(EMIT_SIMD_BINOP_VRR_B) #undef EMIT_SIMD_BINOP_VRR_B #undef SIMD_BINOP_LIST_VRR_B -#define SIMD_BINOP_LIST_VRR_C(V) \ - V(F64x2Add, vfa, 0, 0, 3) \ - V(F64x2Sub, vfs, 0, 0, 3) \ - V(F64x2Mul, vfm, 0, 0, 3) \ - V(F64x2Div, vfd, 0, 0, 3) \ - V(F64x2Min, vfmin, 1, 0, 3) \ - V(F64x2Max, vfmax, 1, 0, 3) \ - V(F64x2Eq, vfce, 0, 0, 3) \ - V(F32x4Add, vfa, 0, 0, 2) \ - V(F32x4Sub, vfs, 0, 0, 2) \ - V(F32x4Mul, vfm, 0, 0, 2) \ - V(F32x4Div, vfd, 0, 0, 2) \ - V(F32x4Min, vfmin, 1, 0, 2) \ - V(F32x4Max, vfmax, 1, 0, 2) \ - V(F32x4Eq, vfce, 0, 0, 2) \ - V(I64x2Add, va, 0, 0, 3) \ - V(I64x2Sub, vs, 0, 0, 3) \ - V(I32x4Add, va, 0, 0, 2) \ - V(I32x4Sub, vs, 0, 0, 2) \ - V(I32x4Mul, vml, 0, 0, 2) \ - V(I32x4MinS, vmn, 0, 0, 2) \ - V(I32x4MinU, vmnl, 0, 0, 2) \ - V(I32x4MaxS, vmx, 0, 0, 2) \ - V(I32x4MaxU, vmxl, 0, 0, 2) \ - V(I16x8Add, va, 0, 0, 1) \ - V(I16x8Sub, vs, 0, 0, 1) \ - V(I16x8Mul, vml, 0, 0, 1) \ - V(I16x8MinS, vmn, 0, 0, 1) \ - V(I16x8MinU, vmnl, 0, 0, 1) \ - V(I16x8MaxS, vmx, 0, 0, 1) \ - V(I16x8MaxU, vmxl, 0, 0, 1) \ - V(I8x16Add, va, 0, 0, 0) \ - V(I8x16Sub, vs, 0, 0, 0) \ - V(I8x16MinS, vmn, 0, 0, 0) \ - V(I8x16MinU, vmnl, 0, 0, 0) \ - V(I8x16MaxS, vmx, 0, 0, 0) \ - V(I8x16MaxU, vmxl, 0, 0, 0) +#define SIMD_BINOP_LIST_VRR_C(V) \ + V(F64x2Add, vfa, 0, 0, 3) \ + V(F64x2Sub, vfs, 0, 0, 3) \ + V(F64x2Mul, vfm, 0, 0, 3) \ + V(F64x2Div, vfd, 0, 0, 3) \ + V(F64x2Min, vfmin, 1, 0, 3) \ + V(F64x2Max, vfmax, 1, 0, 3) \ + V(F64x2Eq, vfce, 0, 0, 3) \ + V(F64x2Pmin, vfmin, 3, 0, 3) \ + V(F64x2Pmax, vfmax, 3, 0, 3) \ + V(F32x4Add, vfa, 0, 0, 2) \ + V(F32x4Sub, vfs, 0, 0, 2) \ + V(F32x4Mul, vfm, 0, 0, 2) \ + V(F32x4Div, vfd, 0, 0, 2) \ + V(F32x4Min, vfmin, 1, 0, 2) \ + V(F32x4Max, vfmax, 1, 0, 2) \ + V(F32x4Eq, vfce, 0, 0, 2) \ + V(F32x4Pmin, vfmin, 3, 0, 2) \ + V(F32x4Pmax, vfmax, 3, 0, 2) \ + V(I64x2Add, va, 0, 0, 3) \ + V(I64x2Sub, vs, 0, 0, 3) \ + V(I32x4Add, va, 0, 0, 2) \ + V(I32x4Sub, vs, 0, 0, 2) \ + V(I32x4Mul, vml, 0, 0, 2) \ + V(I32x4MinS, vmn, 0, 0, 2) \ + V(I32x4MinU, vmnl, 0, 0, 2) \ + V(I32x4MaxS, vmx, 0, 0, 2) \ + V(I32x4MaxU, vmxl, 0, 0, 2) \ + V(I16x8Add, va, 0, 0, 1) \ + V(I16x8Sub, vs, 0, 0, 1) \ + V(I16x8Mul, vml, 0, 0, 1) \ + V(I16x8MinS, vmn, 0, 0, 1) \ + V(I16x8MinU, vmnl, 0, 0, 1) \ + V(I16x8MaxS, vmx, 0, 0, 1) \ + V(I16x8MaxU, vmxl, 0, 0, 1) \ + V(I16x8RoundingAverageU, vavgl, 0, 0, 1) \ + V(I8x16Add, va, 0, 0, 0) \ + V(I8x16Sub, vs, 0, 0, 0) \ + V(I8x16MinS, vmn, 0, 0, 0) \ + V(I8x16MinU, vmnl, 0, 0, 0) \ + V(I8x16MaxS, vmx, 0, 0, 0) \ + V(I8x16MaxU, vmxl, 0, 0, 0) \ + V(I8x16RoundingAverageU, vavgl, 0, 0, 0) \ + V(S128And, vn, 0, 0, 0) \ + V(S128Or, vo, 0, 0, 0) \ + V(S128Xor, vx, 0, 0, 0) \ + V(S128AndNot, vnc, 0, 0, 0) #define EMIT_SIMD_BINOP_VRR_C(name, op, c1, c2, c3) \ void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \ @@ -5311,35 +5389,98 @@ SIMD_BINOP_LIST_VRR_C(EMIT_SIMD_BINOP_VRR_C) V(I8x16ShrS, vesrav, 0) \ V(I8x16ShrU, vesrlv, 0) -#define EMIT_SIMD_SHIFT(name, op, c1) \ - void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \ - Register src2) { \ - vlvg(kScratchDoubleReg, src2, MemOperand(r0, 0), Condition(c1)); \ - vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(0), Condition(c1)); \ - op(dst, src1, kScratchDoubleReg, Condition(0), Condition(0), \ - Condition(c1)); \ - } \ - void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \ - const Operand& src2) { \ - mov(ip, src2); \ - name(dst, src1, ip); \ +#define EMIT_SIMD_SHIFT(name, op, c1) \ + void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \ + Register src2, Simd128Register scratch) { \ + vlvg(scratch, src2, MemOperand(r0, 0), Condition(c1)); \ + vrep(scratch, scratch, Operand(0), Condition(c1)); \ + op(dst, src1, scratch, Condition(0), Condition(0), Condition(c1)); \ + } \ + void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \ + const Operand& src2, Register scratch1, \ + Simd128Register scratch2) { \ + mov(scratch1, src2); \ + name(dst, src1, scratch1, scratch2); \ } SIMD_SHIFT_LIST(EMIT_SIMD_SHIFT) #undef EMIT_SIMD_SHIFT #undef SIMD_SHIFT_LIST +#define SIMD_EXT_MUL_LIST(V) \ + V(I64x2ExtMulLowI32x4S, vme, vmo, vmrl, 2) \ + V(I64x2ExtMulHighI32x4S, vme, vmo, vmrh, 2) \ + V(I64x2ExtMulLowI32x4U, vmle, vmlo, vmrl, 2) \ + V(I64x2ExtMulHighI32x4U, vmle, vmlo, vmrh, 2) \ + V(I32x4ExtMulLowI16x8S, vme, vmo, vmrl, 1) \ + V(I32x4ExtMulHighI16x8S, vme, vmo, vmrh, 1) \ + V(I32x4ExtMulLowI16x8U, vmle, vmlo, vmrl, 1) \ + V(I32x4ExtMulHighI16x8U, vmle, vmlo, vmrh, 1) \ + V(I16x8ExtMulLowI8x16S, vme, vmo, vmrl, 0) \ + V(I16x8ExtMulHighI8x16S, vme, vmo, vmrh, 0) \ + V(I16x8ExtMulLowI8x16U, vmle, vmlo, vmrl, 0) \ + V(I16x8ExtMulHighI8x16U, vmle, vmlo, vmrh, 0) + +#define EMIT_SIMD_EXT_MUL(name, mul_even, mul_odd, merge, mode) \ + void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \ + Simd128Register src2, Simd128Register scratch) { \ + mul_even(scratch, src1, src2, Condition(0), Condition(0), \ + Condition(mode)); \ + mul_odd(dst, src1, src2, Condition(0), Condition(0), Condition(mode)); \ + merge(dst, scratch, dst, Condition(0), Condition(0), Condition(mode + 1)); \ + } +SIMD_EXT_MUL_LIST(EMIT_SIMD_EXT_MUL) +#undef EMIT_SIMD_EXT_MUL +#undef SIMD_EXT_MUL_LIST + +#define SIMD_ALL_TRUE_LIST(V) \ + V(I64x2AllTrue, 3) \ + V(I32x4AllTrue, 2) \ + V(I16x8AllTrue, 1) \ + V(I8x16AllTrue, 0) + +#define EMIT_SIMD_ALL_TRUE(name, mode) \ + void TurboAssembler::name(Register dst, Simd128Register src, \ + Register scratch1, Simd128Register scratch2) { \ + mov(scratch1, Operand(1)); \ + xgr(dst, dst); \ + vx(scratch2, scratch2, scratch2, Condition(0), Condition(0), \ + Condition(2)); \ + vceq(scratch2, src, scratch2, Condition(0), Condition(mode)); \ + vtm(scratch2, scratch2, Condition(0), Condition(0), Condition(0)); \ + locgr(Condition(8), dst, scratch1); \ + } +SIMD_ALL_TRUE_LIST(EMIT_SIMD_ALL_TRUE) +#undef EMIT_SIMD_ALL_TRUE +#undef SIMD_ALL_TRUE_LIST + +#define SIMD_QFM_LIST(V) \ + V(F64x2Qfma, vfma, 3) \ + V(F64x2Qfms, vfnms, 3) \ + V(F32x4Qfma, vfma, 2) \ + V(F32x4Qfms, vfnms, 2) + +#define EMIT_SIMD_QFM(name, op, c1) \ + void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \ + Simd128Register src2, Simd128Register src3) { \ + op(dst, src2, src3, src1, Condition(c1), Condition(0)); \ + } +SIMD_QFM_LIST(EMIT_SIMD_QFM) +#undef EMIT_SIMD_QFM +#undef SIMD_QFM_LIST + void TurboAssembler::I64x2Mul(Simd128Register dst, Simd128Register src1, - Simd128Register src2) { - Register scratch_1 = r0; - Register scratch_2 = r1; + Simd128Register src2, Register scratch1, + Register scratch2, Register scratch3) { + Register scratch_1 = scratch1; + Register scratch_2 = scratch2; for (int i = 0; i < 2; i++) { vlgv(scratch_1, src1, MemOperand(r0, i), Condition(3)); vlgv(scratch_2, src2, MemOperand(r0, i), Condition(3)); MulS64(scratch_1, scratch_2); - scratch_1 = r1; - scratch_2 = ip; + scratch_1 = scratch2; + scratch_2 = scratch3; } - vlvgp(dst, r0, r1); + vlvgp(dst, scratch1, scratch2); } void TurboAssembler::F64x2Ne(Simd128Register dst, Simd128Register src1, @@ -5401,10 +5542,10 @@ void TurboAssembler::I32x4GeS(Simd128Register dst, Simd128Register src1, } void TurboAssembler::I32x4GeU(Simd128Register dst, Simd128Register src1, - Simd128Register src2) { - vceq(kScratchDoubleReg, src1, src2, Condition(0), Condition(2)); + Simd128Register src2, Simd128Register scratch) { + vceq(scratch, src1, src2, Condition(0), Condition(2)); vchl(dst, src1, src2, Condition(0), Condition(2)); - vo(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(2)); + vo(dst, dst, scratch, Condition(0), Condition(0), Condition(2)); } void TurboAssembler::I16x8Ne(Simd128Register dst, Simd128Register src1, @@ -5421,10 +5562,10 @@ void TurboAssembler::I16x8GeS(Simd128Register dst, Simd128Register src1, } void TurboAssembler::I16x8GeU(Simd128Register dst, Simd128Register src1, - Simd128Register src2) { - vceq(kScratchDoubleReg, src1, src2, Condition(0), Condition(1)); + Simd128Register src2, Simd128Register scratch) { + vceq(scratch, src1, src2, Condition(0), Condition(1)); vchl(dst, src1, src2, Condition(0), Condition(1)); - vo(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(1)); + vo(dst, dst, scratch, Condition(0), Condition(0), Condition(1)); } void TurboAssembler::I8x16Ne(Simd128Register dst, Simd128Register src1, @@ -5441,11 +5582,419 @@ void TurboAssembler::I8x16GeS(Simd128Register dst, Simd128Register src1, } void TurboAssembler::I8x16GeU(Simd128Register dst, Simd128Register src1, - Simd128Register src2) { - vceq(kScratchDoubleReg, src1, src2, Condition(0), Condition(0)); + Simd128Register src2, Simd128Register scratch) { + vceq(scratch, src1, src2, Condition(0), Condition(0)); vchl(dst, src1, src2, Condition(0), Condition(0)); - vo(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0)); + vo(dst, dst, scratch, Condition(0), Condition(0), Condition(0)); +} + +void TurboAssembler::I64x2BitMask(Register dst, Simd128Register src, + Register scratch1, Simd128Register scratch2) { + mov(scratch1, Operand(0x8080808080800040)); + vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3)); + vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0)); + vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0)); +} + +void TurboAssembler::I32x4BitMask(Register dst, Simd128Register src, + Register scratch1, Simd128Register scratch2) { + mov(scratch1, Operand(0x8080808000204060)); + vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3)); + vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0)); + vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0)); +} + +void TurboAssembler::I16x8BitMask(Register dst, Simd128Register src, + Register scratch1, Simd128Register scratch2) { + mov(scratch1, Operand(0x10203040506070)); + vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3)); + vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0)); + vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0)); +} + +void TurboAssembler::F64x2ConvertLowI32x4S(Simd128Register dst, + Simd128Register src) { + vupl(dst, src, Condition(0), Condition(0), Condition(2)); + vcdg(dst, dst, Condition(4), Condition(0), Condition(3)); +} + +void TurboAssembler::F64x2ConvertLowI32x4U(Simd128Register dst, + Simd128Register src) { + vupll(dst, src, Condition(0), Condition(0), Condition(2)); + vcdlg(dst, dst, Condition(4), Condition(0), Condition(3)); +} + +void TurboAssembler::I8x16BitMask(Register dst, Simd128Register src, + Register scratch1, Register scratch2, + Simd128Register scratch3) { + mov(scratch1, Operand(0x4048505860687078)); + mov(scratch2, Operand(0x8101820283038)); + vlvgp(scratch3, scratch2, scratch1); + vbperm(scratch3, src, scratch3, Condition(0), Condition(0), Condition(0)); + vlgv(dst, scratch3, MemOperand(r0, 3), Condition(1)); +} + +void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src, + Register scratch) { + mov(dst, Operand(1)); + xgr(scratch, scratch); + vtm(src, src, Condition(0), Condition(0), Condition(0)); + locgr(Condition(8), dst, scratch); +} + +#define CONVERT_FLOAT_TO_INT32(convert, dst, src, scratch1, scratch2) \ + for (int index = 0; index < 4; index++) { \ + vlgv(scratch2, src, MemOperand(r0, index), Condition(2)); \ + MovIntToFloat(scratch1, scratch2); \ + convert(scratch2, scratch1, kRoundToZero); \ + vlvg(dst, scratch2, MemOperand(r0, index), Condition(2)); \ + } +void TurboAssembler::I32x4SConvertF32x4(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Register scratch2) { + // NaN to 0. + vfce(scratch1, src, src, Condition(0), Condition(0), Condition(2)); + vn(dst, src, scratch1, Condition(0), Condition(0), Condition(0)); + if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) { + vcgd(dst, dst, Condition(5), Condition(0), Condition(2)); + } else { + CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32, dst, dst, scratch1, scratch2) + } +} + +void TurboAssembler::I32x4UConvertF32x4(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Register scratch2) { + // vclgd or ConvertFloat32ToUnsignedInt32 will convert NaN to 0, negative to 0 + // automatically. + if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) { + vclgd(dst, src, Condition(5), Condition(0), Condition(2)); + } else { + CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32, dst, src, scratch1, + scratch2) + } } +#undef CONVERT_FLOAT_TO_INT32 + +#define CONVERT_INT32_TO_FLOAT(convert, dst, src, scratch1, scratch2) \ + for (int index = 0; index < 4; index++) { \ + vlgv(scratch2, src, MemOperand(r0, index), Condition(2)); \ + convert(scratch1, scratch2); \ + MovFloatToInt(scratch2, scratch1); \ + vlvg(dst, scratch2, MemOperand(r0, index), Condition(2)); \ + } +void TurboAssembler::F32x4SConvertI32x4(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Register scratch2) { + if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) { + vcdg(dst, src, Condition(4), Condition(0), Condition(2)); + } else { + CONVERT_INT32_TO_FLOAT(ConvertIntToFloat, dst, src, scratch1, scratch2) + } +} +void TurboAssembler::F32x4UConvertI32x4(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Register scratch2) { + if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) { + vcdlg(dst, src, Condition(4), Condition(0), Condition(2)); + } else { + CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat, dst, src, scratch1, + scratch2) + } +} +#undef CONVERT_INT32_TO_FLOAT + +void TurboAssembler::I16x8SConvertI32x4(Simd128Register dst, + Simd128Register src1, + Simd128Register src2) { + vpks(dst, src2, src1, Condition(0), Condition(2)); +} + +void TurboAssembler::I8x16SConvertI16x8(Simd128Register dst, + Simd128Register src1, + Simd128Register src2) { + vpks(dst, src2, src1, Condition(0), Condition(1)); +} + +#define VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, mode) \ + vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), \ + Condition(0), Condition(mode)); \ + vmx(scratch, src1, kDoubleRegZero, Condition(0), Condition(0), \ + Condition(mode)); \ + vmx(dst, src2, kDoubleRegZero, Condition(0), Condition(0), Condition(mode)); +void TurboAssembler::I16x8UConvertI32x4(Simd128Register dst, + Simd128Register src1, + Simd128Register src2, + Simd128Register scratch) { + // treat inputs as signed, and saturate to unsigned (negative to 0). + VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, 2) + vpkls(dst, dst, scratch, Condition(0), Condition(2)); +} + +void TurboAssembler::I8x16UConvertI16x8(Simd128Register dst, + Simd128Register src1, + Simd128Register src2, + Simd128Register scratch) { + // treat inputs as signed, and saturate to unsigned (negative to 0). + VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, 1) + vpkls(dst, dst, scratch, Condition(0), Condition(1)); +} +#undef VECTOR_PACK_UNSIGNED + +#define BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, op, extract_high, \ + extract_low, mode) \ + DCHECK(dst != scratch1 && dst != scratch2); \ + DCHECK(dst != src1 && dst != src2); \ + extract_high(scratch1, src1, Condition(0), Condition(0), Condition(mode)); \ + extract_high(scratch2, src2, Condition(0), Condition(0), Condition(mode)); \ + op(dst, scratch1, scratch2, Condition(0), Condition(0), \ + Condition(mode + 1)); \ + extract_low(scratch1, src1, Condition(0), Condition(0), Condition(mode)); \ + extract_low(scratch2, src2, Condition(0), Condition(0), Condition(mode)); \ + op(scratch1, scratch1, scratch2, Condition(0), Condition(0), \ + Condition(mode + 1)); +void TurboAssembler::I16x8AddSatS(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuph, vupl, 1) + vpks(dst, dst, scratch1, Condition(0), Condition(2)); +} + +void TurboAssembler::I16x8SubSatS(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuph, vupl, 1) + vpks(dst, dst, scratch1, Condition(0), Condition(2)); +} + +void TurboAssembler::I16x8AddSatU(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuplh, vupll, 1) + vpkls(dst, dst, scratch1, Condition(0), Condition(2)); +} + +void TurboAssembler::I16x8SubSatU(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuplh, vupll, 1) + // negative intermediate values to 0. + vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), Condition(0), + Condition(0)); + vmx(dst, kDoubleRegZero, dst, Condition(0), Condition(0), Condition(2)); + vmx(scratch1, kDoubleRegZero, scratch1, Condition(0), Condition(0), + Condition(2)); + vpkls(dst, dst, scratch1, Condition(0), Condition(2)); +} + +void TurboAssembler::I8x16AddSatS(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuph, vupl, 0) + vpks(dst, dst, scratch1, Condition(0), Condition(1)); +} + +void TurboAssembler::I8x16SubSatS(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuph, vupl, 0) + vpks(dst, dst, scratch1, Condition(0), Condition(1)); +} + +void TurboAssembler::I8x16AddSatU(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuplh, vupll, 0) + vpkls(dst, dst, scratch1, Condition(0), Condition(1)); +} + +void TurboAssembler::I8x16SubSatU(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2) { + BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuplh, vupll, 0) + // negative intermediate values to 0. + vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), Condition(0), + Condition(0)); + vmx(dst, kDoubleRegZero, dst, Condition(0), Condition(0), Condition(1)); + vmx(scratch1, kDoubleRegZero, scratch1, Condition(0), Condition(0), + Condition(1)); + vpkls(dst, dst, scratch1, Condition(0), Condition(1)); +} +#undef BINOP_EXTRACT + +void TurboAssembler::F64x2PromoteLowF32x4(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Register scratch2, Register scratch3, + Register scratch4) { + Register holder = scratch3; + for (int index = 0; index < 2; ++index) { + vlgv(scratch2, src, MemOperand(scratch2, index + 2), Condition(2)); + MovIntToFloat(scratch1, scratch2); + ldebr(scratch1, scratch1); + MovDoubleToInt64(holder, scratch1); + holder = scratch4; + } + vlvgp(dst, scratch3, scratch4); +} + +void TurboAssembler::F32x4DemoteF64x2Zero(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Register scratch2, Register scratch3, + Register scratch4) { + Register holder = scratch3; + for (int index = 0; index < 2; ++index) { + vlgv(scratch2, src, MemOperand(r0, index), Condition(3)); + MovInt64ToDouble(scratch1, scratch2); + ledbr(scratch1, scratch1); + MovFloatToInt(holder, scratch1); + holder = scratch4; + } + vx(dst, dst, dst, Condition(0), Condition(0), Condition(2)); + vlvg(dst, scratch3, MemOperand(r0, 2), Condition(2)); + vlvg(dst, scratch4, MemOperand(r0, 3), Condition(2)); +} + +#define EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, lane_size, mul_even, \ + mul_odd) \ + CHECK_NE(src, scratch2); \ + vrepi(scratch2, Operand(1), Condition(lane_size)); \ + mul_even(scratch1, src, scratch2, Condition(0), Condition(0), \ + Condition(lane_size)); \ + mul_odd(scratch2, src, scratch2, Condition(0), Condition(0), \ + Condition(lane_size)); \ + va(dst, scratch1, scratch2, Condition(0), Condition(0), \ + Condition(lane_size + 1)); +void TurboAssembler::I32x4ExtAddPairwiseI16x8S(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Simd128Register scratch2) { + EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 1, vme, vmo) +} + +void TurboAssembler::I32x4ExtAddPairwiseI16x8U(Simd128Register dst, + Simd128Register src, + Simd128Register scratch, + Simd128Register scratch2) { + vx(scratch, scratch, scratch, Condition(0), Condition(0), Condition(3)); + vsum(dst, src, scratch, Condition(0), Condition(0), Condition(1)); +} + +void TurboAssembler::I16x8ExtAddPairwiseI8x16S(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Simd128Register scratch2) { + EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vme, vmo) +} + +void TurboAssembler::I16x8ExtAddPairwiseI8x16U(Simd128Register dst, + Simd128Register src, + Simd128Register scratch1, + Simd128Register scratch2) { + EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vmle, vmlo) +} +#undef EXT_ADD_PAIRWISE + +void TurboAssembler::I32x4TruncSatF64x2SZero(Simd128Register dst, + Simd128Register src, + Simd128Register scratch) { + // NaN to 0. + vlr(scratch, src, Condition(0), Condition(0), Condition(0)); + vfce(scratch, scratch, scratch, Condition(0), Condition(0), Condition(3)); + vn(scratch, src, scratch, Condition(0), Condition(0), Condition(0)); + vcgd(scratch, scratch, Condition(5), Condition(0), Condition(3)); + vx(dst, dst, dst, Condition(0), Condition(0), Condition(2)); + vpks(dst, dst, scratch, Condition(0), Condition(3)); +} + +void TurboAssembler::I32x4TruncSatF64x2UZero(Simd128Register dst, + Simd128Register src, + Simd128Register scratch) { + vclgd(scratch, src, Condition(5), Condition(0), Condition(3)); + vx(dst, dst, dst, Condition(0), Condition(0), Condition(2)); + vpkls(dst, dst, scratch, Condition(0), Condition(3)); +} + +void TurboAssembler::S128Const(Simd128Register dst, uint64_t high, uint64_t low, + Register scratch1, Register scratch2) { + mov(scratch1, Operand(low)); + mov(scratch2, Operand(high)); + vlvgp(dst, scratch2, scratch1); +} + +void TurboAssembler::I8x16Swizzle(Simd128Register dst, Simd128Register src1, + Simd128Register src2, Register scratch1, + Register scratch2, Simd128Register scratch3, + Simd128Register scratch4) { + DCHECK(!AreAliased(src1, src2, scratch3, scratch4)); + // Saturate the indices to 5 bits. Input indices more than 31 should + // return 0. + vrepi(scratch3, Operand(31), Condition(0)); + vmnl(scratch4, src2, scratch3, Condition(0), Condition(0), Condition(0)); + // Input needs to be reversed. + vlgv(scratch1, src1, MemOperand(r0, 0), Condition(3)); + vlgv(scratch2, src1, MemOperand(r0, 1), Condition(3)); + lrvgr(scratch1, scratch1); + lrvgr(scratch2, scratch2); + vlvgp(dst, scratch2, scratch1); + // Clear scratch. + vx(scratch3, scratch3, scratch3, Condition(0), Condition(0), Condition(0)); + vperm(dst, dst, scratch3, scratch4, Condition(0), Condition(0)); +} + +void TurboAssembler::I8x16Shuffle(Simd128Register dst, Simd128Register src1, + Simd128Register src2, uint64_t high, + uint64_t low, Register scratch1, + Register scratch2, Simd128Register scratch3) { + mov(scratch1, Operand(low)); + mov(scratch2, Operand(high)); + vlvgp(scratch3, scratch2, scratch1); + vperm(dst, src1, src2, scratch3, Condition(0), Condition(0)); +} + +void TurboAssembler::I32x4DotI16x8S(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch) { + vme(scratch, src1, src2, Condition(0), Condition(0), Condition(1)); + vmo(dst, src1, src2, Condition(0), Condition(0), Condition(1)); + va(dst, scratch, dst, Condition(0), Condition(0), Condition(2)); +} + +#define Q15_MUL_ROAUND(accumulator, src1, src2, const_val, scratch, unpack) \ + unpack(scratch, src1, Condition(0), Condition(0), Condition(1)); \ + unpack(accumulator, src2, Condition(0), Condition(0), Condition(1)); \ + vml(accumulator, scratch, accumulator, Condition(0), Condition(0), \ + Condition(2)); \ + va(accumulator, accumulator, const_val, Condition(0), Condition(0), \ + Condition(2)); \ + vrepi(scratch, Operand(15), Condition(2)); \ + vesrav(accumulator, accumulator, scratch, Condition(0), Condition(0), \ + Condition(2)); +void TurboAssembler::I16x8Q15MulRSatS(Simd128Register dst, Simd128Register src1, + Simd128Register src2, + Simd128Register scratch1, + Simd128Register scratch2, + Simd128Register scratch3) { + DCHECK(!AreAliased(src1, src2, scratch1, scratch2, scratch3)); + vrepi(scratch1, Operand(0x4000), Condition(2)); + Q15_MUL_ROAUND(scratch2, src1, src2, scratch1, scratch3, vupl) + Q15_MUL_ROAUND(dst, src1, src2, scratch1, scratch3, vuph) + vpks(dst, dst, scratch2, Condition(0), Condition(2)); +} +#undef Q15_MUL_ROAUND // Vector LE Load and Transform instructions. #ifdef V8_TARGET_BIG_ENDIAN @@ -5463,16 +6012,16 @@ void TurboAssembler::I8x16GeU(Simd128Register dst, Simd128Register src1, V(16x8, vlbrrep, LoadU16LE, 1) \ V(8x16, vlrep, LoadU8, 0) -#define LOAD_SPLAT(name, vector_instr, scalar_instr, condition) \ - void TurboAssembler::LoadAndSplat##name##LE(Simd128Register dst, \ - const MemOperand& mem) { \ - if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ - vector_instr(dst, mem, Condition(condition)); \ - return; \ - } \ - scalar_instr(r1, mem); \ - vlvg(dst, r1, MemOperand(r0, 0), Condition(condition)); \ - vrep(dst, dst, Operand(0), Condition(condition)); \ +#define LOAD_SPLAT(name, vector_instr, scalar_instr, condition) \ + void TurboAssembler::LoadAndSplat##name##LE( \ + Simd128Register dst, const MemOperand& mem, Register scratch) { \ + if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ + vector_instr(dst, mem, Condition(condition)); \ + return; \ + } \ + scalar_instr(scratch, mem); \ + vlvg(dst, scratch, MemOperand(r0, 0), Condition(condition)); \ + vrep(dst, dst, Operand(0), Condition(condition)); \ } LOAD_SPLAT_LIST(LOAD_SPLAT) #undef LOAD_SPLAT @@ -5486,40 +6035,41 @@ LOAD_SPLAT_LIST(LOAD_SPLAT) V(8x8U, vuplh, 0) \ V(8x8S, vuph, 0) -#define LOAD_EXTEND(name, unpack_instr, condition) \ - void TurboAssembler::LoadAndExtend##name##LE(Simd128Register dst, \ - const MemOperand& mem) { \ - if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ - vlebrg(kScratchDoubleReg, mem, Condition(0)); \ - } else { \ - LoadU64LE(r1, mem); \ - vlvg(kScratchDoubleReg, r1, MemOperand(r0, 0), Condition(3)); \ - } \ - unpack_instr(dst, kScratchDoubleReg, Condition(0), Condition(0), \ - Condition(condition)); \ +#define LOAD_EXTEND(name, unpack_instr, condition) \ + void TurboAssembler::LoadAndExtend##name##LE( \ + Simd128Register dst, const MemOperand& mem, Register scratch) { \ + if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ + vlebrg(dst, mem, Condition(0)); \ + } else { \ + LoadU64LE(scratch, mem); \ + vlvg(dst, scratch, MemOperand(r0, 0), Condition(3)); \ + } \ + unpack_instr(dst, dst, Condition(0), Condition(0), Condition(condition)); \ } LOAD_EXTEND_LIST(LOAD_EXTEND) #undef LOAD_EXTEND #undef LOAD_EXTEND -void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem) { +void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem, + Register scratch) { vx(dst, dst, dst, Condition(0), Condition(0), Condition(0)); if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { vlebrf(dst, mem, Condition(3)); return; } - LoadU32LE(r1, mem); - vlvg(dst, r1, MemOperand(r0, 3), Condition(2)); + LoadU32LE(scratch, mem); + vlvg(dst, scratch, MemOperand(r0, 3), Condition(2)); } -void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) { +void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem, + Register scratch) { vx(dst, dst, dst, Condition(0), Condition(0), Condition(0)); if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { vlebrg(dst, mem, Condition(1)); return; } - LoadU64LE(r1, mem); - vlvg(dst, r1, MemOperand(r0, 1), Condition(3)); + LoadU64LE(scratch, mem); + vlvg(dst, scratch, MemOperand(r0, 1), Condition(3)); } #define LOAD_LANE_LIST(V) \ @@ -5528,15 +6078,16 @@ void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem) { V(16, vlebrh, LoadU16LE, 1) \ V(8, vleb, LoadU8, 0) -#define LOAD_LANE(name, vector_instr, scalar_instr, condition) \ - void TurboAssembler::LoadLane##name##LE(Simd128Register dst, \ - const MemOperand& mem, int lane) { \ - if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ - vector_instr(dst, mem, Condition(lane)); \ - return; \ - } \ - scalar_instr(r1, mem); \ - vlvg(dst, r1, MemOperand(r0, lane), Condition(condition)); \ +#define LOAD_LANE(name, vector_instr, scalar_instr, condition) \ + void TurboAssembler::LoadLane##name##LE(Simd128Register dst, \ + const MemOperand& mem, int lane, \ + Register scratch) { \ + if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ + vector_instr(dst, mem, Condition(lane)); \ + return; \ + } \ + scalar_instr(scratch, mem); \ + vlvg(dst, scratch, MemOperand(r0, lane), Condition(condition)); \ } LOAD_LANE_LIST(LOAD_LANE) #undef LOAD_LANE @@ -5548,15 +6099,16 @@ LOAD_LANE_LIST(LOAD_LANE) V(16, vstebrh, StoreU16LE, 1) \ V(8, vsteb, StoreU8, 0) -#define STORE_LANE(name, vector_instr, scalar_instr, condition) \ - void TurboAssembler::StoreLane##name##LE(Simd128Register src, \ - const MemOperand& mem, int lane) { \ - if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ - vector_instr(src, mem, Condition(lane)); \ - return; \ - } \ - vlgv(r1, src, MemOperand(r0, lane), Condition(condition)); \ - scalar_instr(r1, mem); \ +#define STORE_LANE(name, vector_instr, scalar_instr, condition) \ + void TurboAssembler::StoreLane##name##LE(Simd128Register src, \ + const MemOperand& mem, int lane, \ + Register scratch) { \ + if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) { \ + vector_instr(src, mem, Condition(lane)); \ + return; \ + } \ + vlgv(scratch, src, MemOperand(r0, lane), Condition(condition)); \ + scalar_instr(scratch, mem); \ } STORE_LANE_LIST(STORE_LANE) #undef STORE_LANE @@ -5580,8 +6132,6 @@ void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) { LoadU64(destination, MemOperand(kRootRegister, offset)); } -#undef kScratchDoubleReg - } // namespace internal } // namespace v8 |