diff options
Diffstat (limited to 'deps/v8/src/codegen/riscv/assembler-riscv.cc')
-rw-r--r-- | deps/v8/src/codegen/riscv/assembler-riscv.cc | 498 |
1 files changed, 360 insertions, 138 deletions
diff --git a/deps/v8/src/codegen/riscv/assembler-riscv.cc b/deps/v8/src/codegen/riscv/assembler-riscv.cc index f8c2e33051..b6844a6f37 100644 --- a/deps/v8/src/codegen/riscv/assembler-riscv.cc +++ b/deps/v8/src/codegen/riscv/assembler-riscv.cc @@ -34,6 +34,7 @@ #include "src/codegen/riscv/assembler-riscv.h" +#include "src/base/bits.h" #include "src/base/cpu.h" #include "src/codegen/assembler-inl.h" #include "src/codegen/safepoint-table.h" @@ -132,10 +133,8 @@ Register ToRegister(int num) { const int RelocInfo::kApplyMask = RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE) | - RelocInfo::ModeMask(RelocInfo::NEAR_BUILTIN_ENTRY) | RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE_ENCODED) | - RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET) | - RelocInfo::ModeMask(RelocInfo::CODE_TARGET); + RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET); bool RelocInfo::IsCodedSpecially() { // The deserializer needs to know whether a pointer is specially coded. Being @@ -225,13 +224,13 @@ void Assembler::GetCode(Isolate* isolate, CodeDesc* desc, SafepointTableBuilder* safepoint_table_builder, int handler_table_offset) { // As a crutch to avoid having to add manual Align calls wherever we use a - // raw workflow to create Code objects (mostly in tests), add another Align - // call here. It does no harm - the end of the Code object is aligned to the - // (larger) kCodeAlignment anyways. + // raw workflow to create InstructionStream objects (mostly in tests), add + // another Align call here. It does no harm - the end of the InstructionStream + // object is aligned to the (larger) kCodeAlignment anyways. // TODO(jgruber): Consider moving responsibility for proper alignment to // metadata table builders (safepoint, handler, constant pool, code // comments). - DataAlign(Code::kMetadataAlignment); + DataAlign(InstructionStream::kMetadataAlignment); ForceConstantPoolEmissionWithoutJump(); @@ -564,8 +563,10 @@ void Assembler::target_at_put(int pos, int target_pos, bool is_internal, } break; default: { // Emitted label constant, not part of a branch. - // Make label relative to Code pointer of generated Code object. - instr_at_put(pos, target_pos + (Code::kHeaderSize - kHeapObjectTag)); + // Make label relative to Code pointer of generated InstructionStream + // object. + instr_at_put( + pos, target_pos + (InstructionStream::kHeaderSize - kHeapObjectTag)); } break; } disassembleInstr(instr); @@ -839,7 +840,8 @@ void Assembler::label_at_put(Label* L, int at_offset) { reinterpret_cast<Instr*>(buffer_start_ + at_offset), at_offset); if (L->is_bound()) { target_pos = L->pos(); - instr_at_put(at_offset, target_pos + (Code::kHeaderSize - kHeapObjectTag)); + instr_at_put(at_offset, target_pos + (InstructionStream::kHeaderSize - + kHeapObjectTag)); } else { if (L->is_linked()) { target_pos = L->pos(); // L's link. @@ -884,8 +886,29 @@ void Assembler::EBREAK() { void Assembler::nop() { addi(ToRegister(0), ToRegister(0), 0); } +inline int64_t signExtend(uint64_t V, int N) { + return int64_t(V << (64 - N)) >> (64 - N); +} + #if V8_TARGET_ARCH_RISCV64 void Assembler::RV_li(Register rd, int64_t imm) { + UseScratchRegisterScope temps(this); + if (RecursiveLiCount(imm) > GeneralLiCount(imm, temps.hasAvailable())) { + GeneralLi(rd, imm); + } else { + RecursiveLi(rd, imm); + } +} + +int Assembler::RV_li_count(int64_t imm, bool is_get_temp_reg) { + if (RecursiveLiCount(imm) > GeneralLiCount(imm, is_get_temp_reg)) { + return GeneralLiCount(imm, is_get_temp_reg); + } else { + return RecursiveLiCount(imm); + } +} + +void Assembler::GeneralLi(Register rd, int64_t imm) { // 64-bit imm is put in the register rd. // In most cases the imm is 32 bit and 2 instructions are generated. If a // temporary register is available, in the worst case, 6 instructions are @@ -913,6 +936,7 @@ void Assembler::RV_li(Register rd, int64_t imm) { } return; } else { + UseScratchRegisterScope temps(this); // 64-bit case: divide imm into two 32-bit parts, upper and lower int64_t up_32 = imm >> 32; int64_t low_32 = imm & 0xffffffffull; @@ -921,7 +945,6 @@ void Assembler::RV_li(Register rd, int64_t imm) { if (up_32 == 0 || low_32 == 0) { // No temp register is needed } else { - UseScratchRegisterScope temps(this); BlockTrampolinePoolScope block_trampoline_pool(this); temp_reg = temps.hasAvailable() ? temps.Acquire() : no_reg; } @@ -1037,129 +1060,6 @@ void Assembler::RV_li(Register rd, int64_t imm) { } } -int Assembler::li_estimate(int64_t imm, bool is_get_temp_reg) { - int count = 0; - // imitate Assembler::RV_li - if (is_int32(imm + 0x800)) { - // 32-bit case. Maximum of 2 instructions generated - int64_t high_20 = ((imm + 0x800) >> 12); - int64_t low_12 = imm << 52 >> 52; - if (high_20) { - count++; - if (low_12) { - count++; - } - } else { - count++; - } - return count; - } else { - // 64-bit case: divide imm into two 32-bit parts, upper and lower - int64_t up_32 = imm >> 32; - int64_t low_32 = imm & 0xffffffffull; - // Check if a temporary register is available - if (is_get_temp_reg) { - // keep track of hardware behavior for lower part in sim_low - int64_t sim_low = 0; - // Build lower part - if (low_32 != 0) { - int64_t high_20 = ((low_32 + 0x800) >> 12); - int64_t low_12 = low_32 & 0xfff; - if (high_20) { - // Adjust to 20 bits for the case of overflow - high_20 &= 0xfffff; - sim_low = ((high_20 << 12) << 32) >> 32; - count++; - if (low_12) { - sim_low += (low_12 << 52 >> 52) | low_12; - count++; - } - } else { - sim_low = low_12; - count++; - } - } - if (sim_low & 0x100000000) { - // Bit 31 is 1. Either an overflow or a negative 64 bit - if (up_32 == 0) { - // Positive number, but overflow because of the add 0x800 - count++; - count++; - return count; - } - // low_32 is a negative 64 bit after the build - up_32 = (up_32 - 0xffffffff) & 0xffffffff; - } - if (up_32 == 0) { - return count; - } - int64_t high_20 = (up_32 + 0x800) >> 12; - int64_t low_12 = up_32 & 0xfff; - if (high_20) { - // Adjust to 20 bits for the case of overflow - high_20 &= 0xfffff; - count++; - if (low_12) { - count++; - } - } else { - count++; - } - // Put it at the bgining of register - count++; - if (low_32 != 0) { - count++; - } - return count; - } - // No temp register. Build imm in rd. - // Build upper 32 bits first in rd. Divide lower 32 bits parts and add - // parts to the upper part by doing shift and add. - // First build upper part in rd. - int64_t high_20 = (up_32 + 0x800) >> 12; - int64_t low_12 = up_32 & 0xfff; - if (high_20) { - // Adjust to 20 bits for the case of overflow - high_20 &= 0xfffff; - count++; - if (low_12) { - count++; - } - } else { - count++; - } - // upper part already in rd. Each part to be added to rd, has maximum of 11 - // bits, and always starts with a 1. rd is shifted by the size of the part - // plus the number of zeros between the parts. Each part is added after the - // left shift. - uint32_t mask = 0x80000000; - int32_t i; - for (i = 0; i < 32; i++) { - if ((low_32 & mask) == 0) { - mask >>= 1; - if (i == 31) { - // rest is zero - count++; - } - continue; - } - // The first 1 seen - if ((i + 11) < 32) { - // Pick 11 bits - count++; - count++; - i += 10; - mask >>= 11; - } else { - count++; - count++; - break; - } - } - } - return count; -} - void Assembler::li_ptr(Register rd, int64_t imm) { // Initialize rd with an address // Pointers are 48 bits @@ -1207,7 +1107,7 @@ void Assembler::RV_li(Register rd, int32_t imm) { } } -int Assembler::li_estimate(int32_t imm, bool is_get_temp_reg) { +int Assembler::RV_li_count(int32_t imm, bool is_get_temp_reg) { int count = 0; // imitate Assembler::RV_li int32_t high_20 = ((imm + 0x800) >> 12); @@ -1250,8 +1150,8 @@ void Assembler::break_(uint32_t code, bool break_as_stop) { // simulator expects a char pointer after the stop instruction. // See constants-mips.h for explanation. DCHECK( - (break_as_stop && code <= kMaxStopCode && code > kMaxWatchpointCode) || - (!break_as_stop && (code > kMaxStopCode || code <= kMaxWatchpointCode))); + (break_as_stop && code <= kMaxStopCode && code > kMaxTracepointCode) || + (!break_as_stop && (code > kMaxStopCode || code <= kMaxTracepointCode))); // since ebreak does not allow additional immediate field, we use the // immediate field of lui instruction immediately following the ebreak to @@ -1451,7 +1351,8 @@ void Assembler::dd(Label* label) { void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) { if (!ShouldRecordRelocInfo(rmode)) return; // We do not try to reuse pool constants. - RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code()); + RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code(), + InstructionStream()); DCHECK_GE(buffer_space(), kMaxRelocSize); // Too late to grow buffer here. reloc_info_writer.Write(&rinfo); } @@ -1909,5 +1810,326 @@ const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB; const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB; const size_t ConstantPool::kApproxMaxEntryCount = 512; +#if defined(V8_TARGET_ARCH_RISCV64) +// LLVM Code +//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ +//-*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// Exceptions. See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +void Assembler::RecursiveLi(Register rd, int64_t val) { + if (val > 0 && RecursiveLiImplCount(val) > 2) { + unsigned LeadingZeros = base::bits::CountLeadingZeros((uint64_t)val); + uint64_t ShiftedVal = (uint64_t)val << LeadingZeros; + int countFillZero = RecursiveLiImplCount(ShiftedVal) + 1; + if (countFillZero < RecursiveLiImplCount(val)) { + RecursiveLiImpl(rd, ShiftedVal); + srli(rd, rd, LeadingZeros); + return; + } + } + RecursiveLiImpl(rd, val); +} + +int Assembler::RecursiveLiCount(int64_t val) { + if (val > 0 && RecursiveLiImplCount(val) > 2) { + unsigned LeadingZeros = base::bits::CountLeadingZeros((uint64_t)val); + uint64_t ShiftedVal = (uint64_t)val << LeadingZeros; + // Fill in the bits that will be shifted out with 1s. An example where + // this helps is trailing one masks with 32 or more ones. This will + // generate ADDI -1 and an SRLI. + int countFillZero = RecursiveLiImplCount(ShiftedVal) + 1; + if (countFillZero < RecursiveLiImplCount(val)) { + return countFillZero; + } + } + return RecursiveLiImplCount(val); +} + +void Assembler::RecursiveLiImpl(Register rd, int64_t Val) { + if (is_int32(Val)) { + // Depending on the active bits in the immediate Value v, the following + // instruction sequences are emitted: + // + // v == 0 : ADDI + // v[0,12) != 0 && v[12,32) == 0 : ADDI + // v[0,12) == 0 && v[12,32) != 0 : LUI + // v[0,32) != 0 : LUI+ADDI(W) + int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF; + int64_t Lo12 = Val << 52 >> 52; + + if (Hi20) { + lui(rd, (int32_t)Hi20); + } + + if (Lo12 || Hi20 == 0) { + if (Hi20) { + addiw(rd, rd, Lo12); + } else { + addi(rd, zero_reg, Lo12); + } + } + return; + } + + // In the worst case, for a full 64-bit constant, a sequence of 8 + // instructions (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be + // emitted. Note that the first two instructions (LUI+ADDIW) can contribute + // up to 32 bits while the following ADDI instructions contribute up to 12 + // bits each. + // + // On the first glance, implementing this seems to be possible by simply + // emitting the most significant 32 bits (LUI+ADDIW) followed by as many + // left shift (SLLI) and immediate additions (ADDI) as needed. However, due + // to the fact that ADDI performs a sign extended addition, doing it like + // that would only be possible when at most 11 bits of the ADDI instructions + // are used. Using all 12 bits of the ADDI instructions, like done by GAS, + // actually requires that the constant is processed starting with the least + // significant bit. + // + // In the following, constants are processed from LSB to MSB but instruction + // emission is performed from MSB to LSB by recursively calling + // generateInstSeq. In each recursion, first the lowest 12 bits are removed + // from the constant and the optimal shift amount, which can be greater than + // 12 bits if the constant is sparse, is determined. Then, the shifted + // remaining constant is processed recursively and gets emitted as soon as + // it fits into 32 bits. The emission of the shifts and additions is + // subsequently performed when the recursion returns. + + int64_t Lo12 = Val << 52 >> 52; + int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12; + int ShiftAmount = 12 + base::bits::CountTrailingZeros((uint64_t)Hi52); + Hi52 = signExtend(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount); + + // If the remaining bits don't fit in 12 bits, we might be able to reduce + // the shift amount in order to use LUI which will zero the lower 12 bits. + bool Unsigned = false; + if (ShiftAmount > 12 && !is_int12(Hi52)) { + if (is_int32((uint64_t)Hi52 << 12)) { + // Reduce the shift amount and add zeros to the LSBs so it will match + // LUI. + ShiftAmount -= 12; + Hi52 = (uint64_t)Hi52 << 12; + } + } + RecursiveLi(rd, Hi52); + + if (Unsigned) { + } else { + slli(rd, rd, ShiftAmount); + } + if (Lo12) { + addi(rd, rd, Lo12); + } +} + +int Assembler::RecursiveLiImplCount(int64_t Val) { + int count = 0; + if (is_int32(Val)) { + // Depending on the active bits in the immediate Value v, the following + // instruction sequences are emitted: + // + // v == 0 : ADDI + // v[0,12) != 0 && v[12,32) == 0 : ADDI + // v[0,12) == 0 && v[12,32) != 0 : LUI + // v[0,32) != 0 : LUI+ADDI(W) + int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF; + int64_t Lo12 = Val << 52 >> 52; + + if (Hi20) { + // lui(rd, (int32_t)Hi20); + count++; + } + + if (Lo12 || Hi20 == 0) { + // unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI; + // Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12)); + count++; + } + return count; + } + + // In the worst case, for a full 64-bit constant, a sequence of 8 + // instructions (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be + // emitted. Note that the first two instructions (LUI+ADDIW) can contribute + // up to 32 bits while the following ADDI instructions contribute up to 12 + // bits each. + // + // On the first glance, implementing this seems to be possible by simply + // emitting the most significant 32 bits (LUI+ADDIW) followed by as many + // left shift (SLLI) and immediate additions (ADDI) as needed. However, due + // to the fact that ADDI performs a sign extended addition, doing it like + // that would only be possible when at most 11 bits of the ADDI instructions + // are used. Using all 12 bits of the ADDI instructions, like done by GAS, + // actually requires that the constant is processed starting with the least + // significant bit. + // + // In the following, constants are processed from LSB to MSB but instruction + // emission is performed from MSB to LSB by recursively calling + // generateInstSeq. In each recursion, first the lowest 12 bits are removed + // from the constant and the optimal shift amount, which can be greater than + // 12 bits if the constant is sparse, is determined. Then, the shifted + // remaining constant is processed recursively and gets emitted as soon as + // it fits into 32 bits. The emission of the shifts and additions is + // subsequently performed when the recursion returns. + + int64_t Lo12 = Val << 52 >> 52; + int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12; + int ShiftAmount = 12 + base::bits::CountTrailingZeros((uint64_t)Hi52); + Hi52 = signExtend(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount); + + // If the remaining bits don't fit in 12 bits, we might be able to reduce + // the shift amount in order to use LUI which will zero the lower 12 bits. + bool Unsigned = false; + if (ShiftAmount > 12 && !is_int12(Hi52)) { + if (is_int32((uint64_t)Hi52 << 12)) { + // Reduce the shift amount and add zeros to the LSBs so it will match + // LUI. + ShiftAmount -= 12; + Hi52 = (uint64_t)Hi52 << 12; + } + } + + count += RecursiveLiImplCount(Hi52); + + if (Unsigned) { + } else { + // slli(rd, rd, ShiftAmount); + count++; + } + if (Lo12) { + // addi(rd, rd, Lo12); + count++; + } + return count; +} + +int Assembler::GeneralLiCount(int64_t imm, bool is_get_temp_reg) { + int count = 0; + // imitate Assembler::RV_li + if (is_int32(imm + 0x800)) { + // 32-bit case. Maximum of 2 instructions generated + int64_t high_20 = ((imm + 0x800) >> 12); + int64_t low_12 = imm << 52 >> 52; + if (high_20) { + count++; + if (low_12) { + count++; + } + } else { + count++; + } + return count; + } else { + // 64-bit case: divide imm into two 32-bit parts, upper and lower + int64_t up_32 = imm >> 32; + int64_t low_32 = imm & 0xffffffffull; + // Check if a temporary register is available + if (is_get_temp_reg) { + // keep track of hardware behavior for lower part in sim_low + int64_t sim_low = 0; + // Build lower part + if (low_32 != 0) { + int64_t high_20 = ((low_32 + 0x800) >> 12); + int64_t low_12 = low_32 & 0xfff; + if (high_20) { + // Adjust to 20 bits for the case of overflow + high_20 &= 0xfffff; + sim_low = ((high_20 << 12) << 32) >> 32; + count++; + if (low_12) { + sim_low += (low_12 << 52 >> 52) | low_12; + count++; + } + } else { + sim_low = low_12; + count++; + } + } + if (sim_low & 0x100000000) { + // Bit 31 is 1. Either an overflow or a negative 64 bit + if (up_32 == 0) { + // Positive number, but overflow because of the add 0x800 + count++; + count++; + return count; + } + // low_32 is a negative 64 bit after the build + up_32 = (up_32 - 0xffffffff) & 0xffffffff; + } + if (up_32 == 0) { + return count; + } + int64_t high_20 = (up_32 + 0x800) >> 12; + int64_t low_12 = up_32 & 0xfff; + if (high_20) { + // Adjust to 20 bits for the case of overflow + high_20 &= 0xfffff; + count++; + if (low_12) { + count++; + } + } else { + count++; + } + // Put it at the bgining of register + count++; + if (low_32 != 0) { + count++; + } + return count; + } + // No temp register. Build imm in rd. + // Build upper 32 bits first in rd. Divide lower 32 bits parts and add + // parts to the upper part by doing shift and add. + // First build upper part in rd. + int64_t high_20 = (up_32 + 0x800) >> 12; + int64_t low_12 = up_32 & 0xfff; + if (high_20) { + // Adjust to 20 bits for the case of overflow + high_20 &= 0xfffff; + count++; + if (low_12) { + count++; + } + } else { + count++; + } + // upper part already in rd. Each part to be added to rd, has maximum of + // 11 bits, and always starts with a 1. rd is shifted by the size of the + // part plus the number of zeros between the parts. Each part is added + // after the left shift. + uint32_t mask = 0x80000000; + int32_t i; + for (i = 0; i < 32; i++) { + if ((low_32 & mask) == 0) { + mask >>= 1; + if (i == 31) { + // rest is zero + count++; + } + continue; + } + // The first 1 seen + if ((i + 11) < 32) { + // Pick 11 bits + count++; + count++; + i += 10; + mask >>= 11; + } else { + count++; + count++; + break; + } + } + } + return count; +} +#endif + } // namespace internal } // namespace v8 |