1 files changed, 360 insertions, 138 deletions
diff --git a/deps/v8/src/codegen/riscv/assembler-riscv.cc b/deps/v8/src/codegen/riscv/assembler-riscv.cc
index f8c2e33051..b6844a6f37 100644
--- a/deps/v8/src/codegen/riscv/assembler-riscv.cc
+++ b/deps/v8/src/codegen/riscv/assembler-riscv.cc
@@ -34,6 +34,7 @@
 
 #include "src/codegen/riscv/assembler-riscv.h"
 
+#include "src/base/bits.h"
 #include "src/base/cpu.h"
 #include "src/codegen/assembler-inl.h"
 #include "src/codegen/safepoint-table.h"
@@ -132,10 +133,8 @@ Register ToRegister(int num) {
 
 const int RelocInfo::kApplyMask =
     RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE) |
-    RelocInfo::ModeMask(RelocInfo::NEAR_BUILTIN_ENTRY) |
     RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE_ENCODED) |
-    RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET) |
-    RelocInfo::ModeMask(RelocInfo::CODE_TARGET);
+    RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET);
 
 bool RelocInfo::IsCodedSpecially() {
   // The deserializer needs to know whether a pointer is specially coded.  Being
@@ -225,13 +224,13 @@ void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
                         SafepointTableBuilder* safepoint_table_builder,
                         int handler_table_offset) {
   // As a crutch to avoid having to add manual Align calls wherever we use a
-  // raw workflow to create Code objects (mostly in tests), add another Align
-  // call here. It does no harm - the end of the Code object is aligned to the
-  // (larger) kCodeAlignment anyways.
+  // raw workflow to create InstructionStream objects (mostly in tests), add
+  // another Align call here. It does no harm - the end of the InstructionStream
+  // object is aligned to the (larger) kCodeAlignment anyways.
   // TODO(jgruber): Consider moving responsibility for proper alignment to
   // metadata table builders (safepoint, handler, constant pool, code
   // comments).
-  DataAlign(Code::kMetadataAlignment);
+  DataAlign(InstructionStream::kMetadataAlignment);
 
   ForceConstantPoolEmissionWithoutJump();
 
@@ -564,8 +563,10 @@ void Assembler::target_at_put(int pos, int target_pos, bool is_internal,
     } break;
     default: {
       // Emitted label constant, not part of a branch.
-      // Make label relative to Code pointer of generated Code object.
-      instr_at_put(pos, target_pos + (Code::kHeaderSize - kHeapObjectTag));
+      // Make label relative to Code pointer of generated InstructionStream
+      // object.
+      instr_at_put(
+          pos, target_pos + (InstructionStream::kHeaderSize - kHeapObjectTag));
     } break;
   }
   disassembleInstr(instr);
@@ -839,7 +840,8 @@ void Assembler::label_at_put(Label* L, int at_offset) {
                reinterpret_cast<Instr*>(buffer_start_ + at_offset), at_offset);
   if (L->is_bound()) {
     target_pos = L->pos();
-    instr_at_put(at_offset, target_pos + (Code::kHeaderSize - kHeapObjectTag));
+    instr_at_put(at_offset, target_pos + (InstructionStream::kHeaderSize -
+                                          kHeapObjectTag));
   } else {
     if (L->is_linked()) {
       target_pos = L->pos();  // L's link.
@@ -884,8 +886,29 @@ void Assembler::EBREAK() {
 
 void Assembler::nop() { addi(ToRegister(0), ToRegister(0), 0); }
 
+inline int64_t signExtend(uint64_t V, int N) {
+  return int64_t(V << (64 - N)) >> (64 - N);
+}
+
 #if V8_TARGET_ARCH_RISCV64
 void Assembler::RV_li(Register rd, int64_t imm) {
+  UseScratchRegisterScope temps(this);
+  if (RecursiveLiCount(imm) > GeneralLiCount(imm, temps.hasAvailable())) {
+    GeneralLi(rd, imm);
+  } else {
+    RecursiveLi(rd, imm);
+  }
+}
+
+int Assembler::RV_li_count(int64_t imm, bool is_get_temp_reg) {
+  if (RecursiveLiCount(imm) > GeneralLiCount(imm, is_get_temp_reg)) {
+    return GeneralLiCount(imm, is_get_temp_reg);
+  } else {
+    return RecursiveLiCount(imm);
+  }
+}
+
+void Assembler::GeneralLi(Register rd, int64_t imm) {
   // 64-bit imm is put in the register rd.
   // In most cases the imm is 32 bit and 2 instructions are generated. If a
   // temporary register is available, in the worst case, 6 instructions are
@@ -913,6 +936,7 @@ void Assembler::RV_li(Register rd, int64_t imm) {
     }
     return;
   } else {
+    UseScratchRegisterScope temps(this);
     // 64-bit case: divide imm into two 32-bit parts, upper and lower
     int64_t up_32 = imm >> 32;
     int64_t low_32 = imm & 0xffffffffull;
@@ -921,7 +945,6 @@ void Assembler::RV_li(Register rd, int64_t imm) {
     if (up_32 == 0 || low_32 == 0) {
       // No temp register is needed
     } else {
-      UseScratchRegisterScope temps(this);
       BlockTrampolinePoolScope block_trampoline_pool(this);
       temp_reg = temps.hasAvailable() ? temps.Acquire() : no_reg;
     }
@@ -1037,129 +1060,6 @@ void Assembler::RV_li(Register rd, int64_t imm) {
   }
 }
 
-int Assembler::li_estimate(int64_t imm, bool is_get_temp_reg) {
-  int count = 0;
-  // imitate Assembler::RV_li
-  if (is_int32(imm + 0x800)) {
-    // 32-bit case. Maximum of 2 instructions generated
-    int64_t high_20 = ((imm + 0x800) >> 12);
-    int64_t low_12 = imm << 52 >> 52;
-    if (high_20) {
-      count++;
-      if (low_12) {
-        count++;
-      }
-    } else {
-      count++;
-    }
-    return count;
-  } else {
-    // 64-bit case: divide imm into two 32-bit parts, upper and lower
-    int64_t up_32 = imm >> 32;
-    int64_t low_32 = imm & 0xffffffffull;
-    // Check if a temporary register is available
-    if (is_get_temp_reg) {
-      // keep track of hardware behavior for lower part in sim_low
-      int64_t sim_low = 0;
-      // Build lower part
-      if (low_32 != 0) {
-        int64_t high_20 = ((low_32 + 0x800) >> 12);
-        int64_t low_12 = low_32 & 0xfff;
-        if (high_20) {
-          // Adjust to 20 bits for the case of overflow
-          high_20 &= 0xfffff;
-          sim_low = ((high_20 << 12) << 32) >> 32;
-          count++;
-          if (low_12) {
-            sim_low += (low_12 << 52 >> 52) | low_12;
-            count++;
-          }
-        } else {
-          sim_low = low_12;
-          count++;
-        }
-      }
-      if (sim_low & 0x100000000) {
-        // Bit 31 is 1. Either an overflow or a negative 64 bit
-        if (up_32 == 0) {
-          // Positive number, but overflow because of the add 0x800
-          count++;
-          count++;
-          return count;
-        }
-        // low_32 is a negative 64 bit after the build
-        up_32 = (up_32 - 0xffffffff) & 0xffffffff;
-      }
-      if (up_32 == 0) {
-        return count;
-      }
-      int64_t high_20 = (up_32 + 0x800) >> 12;
-      int64_t low_12 = up_32 & 0xfff;
-      if (high_20) {
-        // Adjust to 20 bits for the case of overflow
-        high_20 &= 0xfffff;
-        count++;
-        if (low_12) {
-          count++;
-        }
-      } else {
-        count++;
-      }
-      // Put it at the bgining of register
-      count++;
-      if (low_32 != 0) {
-        count++;
-      }
-      return count;
-    }
-    // No temp register. Build imm in rd.
-    // Build upper 32 bits first in rd. Divide lower 32 bits parts and add
-    // parts to the upper part by doing shift and add.
-    // First build upper part in rd.
-    int64_t high_20 = (up_32 + 0x800) >> 12;
-    int64_t low_12 = up_32 & 0xfff;
-    if (high_20) {
-      // Adjust to 20 bits for the case of overflow
-      high_20 &= 0xfffff;
-      count++;
-      if (low_12) {
-        count++;
-      }
-    } else {
-      count++;
-    }
-    // upper part already in rd. Each part to be added to rd, has maximum of 11
-    // bits, and always starts with a 1. rd is shifted by the size of the part
-    // plus the number of zeros between the parts. Each part is added after the
-    // left shift.
-    uint32_t mask = 0x80000000;
-    int32_t i;
-    for (i = 0; i < 32; i++) {
-      if ((low_32 & mask) == 0) {
-        mask >>= 1;
-        if (i == 31) {
-          // rest is zero
-          count++;
-        }
-        continue;
-      }
-      // The first 1 seen
-      if ((i + 11) < 32) {
-        // Pick 11 bits
-        count++;
-        count++;
-        i += 10;
-        mask >>= 11;
-      } else {
-        count++;
-        count++;
-        break;
-      }
-    }
-  }
-  return count;
-}
-
 void Assembler::li_ptr(Register rd, int64_t imm) {
   // Initialize rd with an address
   // Pointers are 48 bits
@@ -1207,7 +1107,7 @@ void Assembler::RV_li(Register rd, int32_t imm) {
   }
 }
 
-int Assembler::li_estimate(int32_t imm, bool is_get_temp_reg) {
+int Assembler::RV_li_count(int32_t imm, bool is_get_temp_reg) {
   int count = 0;
   // imitate Assembler::RV_li
   int32_t high_20 = ((imm + 0x800) >> 12);
@@ -1250,8 +1150,8 @@ void Assembler::break_(uint32_t code, bool break_as_stop) {
   // simulator expects a char pointer after the stop instruction.
   // See constants-mips.h for explanation.
   DCHECK(
-      (break_as_stop && code <= kMaxStopCode && code > kMaxWatchpointCode) ||
-      (!break_as_stop && (code > kMaxStopCode || code <= kMaxWatchpointCode)));
+      (break_as_stop && code <= kMaxStopCode && code > kMaxTracepointCode) ||
+      (!break_as_stop && (code > kMaxStopCode || code <= kMaxTracepointCode)));
 
   // since ebreak does not allow additional immediate field, we use the
   // immediate field of lui instruction immediately following the ebreak to
@@ -1451,7 +1351,8 @@ void Assembler::dd(Label* label) {
 void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
   if (!ShouldRecordRelocInfo(rmode)) return;
   // We do not try to reuse pool constants.
-  RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
+  RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code(),
+                  InstructionStream());
   DCHECK_GE(buffer_space(), kMaxRelocSize);  // Too late to grow buffer here.
   reloc_info_writer.Write(&rinfo);
 }
@@ -1909,5 +1810,326 @@ const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB;
 const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB;
 const size_t ConstantPool::kApproxMaxEntryCount = 512;
 
+#if defined(V8_TARGET_ARCH_RISCV64)
+// LLVM Code
+//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++
+//-*--===//
+//
+//  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+//  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+//  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+void Assembler::RecursiveLi(Register rd, int64_t val) {
+  if (val > 0 && RecursiveLiImplCount(val) > 2) {
+    unsigned LeadingZeros = base::bits::CountLeadingZeros((uint64_t)val);
+    uint64_t ShiftedVal = (uint64_t)val << LeadingZeros;
+    int countFillZero = RecursiveLiImplCount(ShiftedVal) + 1;
+    if (countFillZero < RecursiveLiImplCount(val)) {
+      RecursiveLiImpl(rd, ShiftedVal);
+      srli(rd, rd, LeadingZeros);
+      return;
+    }
+  }
+  RecursiveLiImpl(rd, val);
+}
+
+int Assembler::RecursiveLiCount(int64_t val) {
+  if (val > 0 && RecursiveLiImplCount(val) > 2) {
+    unsigned LeadingZeros = base::bits::CountLeadingZeros((uint64_t)val);
+    uint64_t ShiftedVal = (uint64_t)val << LeadingZeros;
+    // Fill in the bits that will be shifted out with 1s. An example where
+    // this helps is trailing one masks with 32 or more ones. This will
+    // generate ADDI -1 and an SRLI.
+    int countFillZero = RecursiveLiImplCount(ShiftedVal) + 1;
+    if (countFillZero < RecursiveLiImplCount(val)) {
+      return countFillZero;
+    }
+  }
+  return RecursiveLiImplCount(val);
+}
+
+void Assembler::RecursiveLiImpl(Register rd, int64_t Val) {
+  if (is_int32(Val)) {
+    // Depending on the active bits in the immediate Value v, the following
+    // instruction sequences are emitted:
+    //
+    // v == 0                        : ADDI
+    // v[0,12) != 0 && v[12,32) == 0 : ADDI
+    // v[0,12) == 0 && v[12,32) != 0 : LUI
+    // v[0,32) != 0                  : LUI+ADDI(W)
+    int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
+    int64_t Lo12 = Val << 52 >> 52;
+
+    if (Hi20) {
+      lui(rd, (int32_t)Hi20);
+    }
+
+    if (Lo12 || Hi20 == 0) {
+      if (Hi20) {
+        addiw(rd, rd, Lo12);
+      } else {
+        addi(rd, zero_reg, Lo12);
+      }
+    }
+    return;
+  }
+
+  // In the worst case, for a full 64-bit constant, a sequence of 8
+  // instructions (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be
+  // emitted. Note that the first two instructions (LUI+ADDIW) can contribute
+  // up to 32 bits while the following ADDI instructions contribute up to 12
+  // bits each.
+  //
+  // On the first glance, implementing this seems to be possible by simply
+  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many
+  // left shift (SLLI) and immediate additions (ADDI) as needed. However, due
+  // to the fact that ADDI performs a sign extended addition, doing it like
+  // that would only be possible when at most 11 bits of the ADDI instructions
+  // are used. Using all 12 bits of the ADDI instructions, like done by GAS,
+  // actually requires that the constant is processed starting with the least
+  // significant bit.
+  //
+  // In the following, constants are processed from LSB to MSB but instruction
+  // emission is performed from MSB to LSB by recursively calling
+  // generateInstSeq. In each recursion, first the lowest 12 bits are removed
+  // from the constant and the optimal shift amount, which can be greater than
+  // 12 bits if the constant is sparse, is determined. Then, the shifted
+  // remaining constant is processed recursively and gets emitted as soon as
+  // it fits into 32 bits. The emission of the shifts and additions is
+  // subsequently performed when the recursion returns.
+
+  int64_t Lo12 = Val << 52 >> 52;
+  int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
+  int ShiftAmount = 12 + base::bits::CountTrailingZeros((uint64_t)Hi52);
+  Hi52 = signExtend(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+
+  // If the remaining bits don't fit in 12 bits, we might be able to reduce
+  // the shift amount in order to use LUI which will zero the lower 12 bits.
+  bool Unsigned = false;
+  if (ShiftAmount > 12 && !is_int12(Hi52)) {
+    if (is_int32((uint64_t)Hi52 << 12)) {
+      // Reduce the shift amount and add zeros to the LSBs so it will match
+      // LUI.
+      ShiftAmount -= 12;
+      Hi52 = (uint64_t)Hi52 << 12;
+    }
+  }
+  RecursiveLi(rd, Hi52);
+
+  if (Unsigned) {
+  } else {
+    slli(rd, rd, ShiftAmount);
+  }
+  if (Lo12) {
+    addi(rd, rd, Lo12);
+  }
+}
+
+int Assembler::RecursiveLiImplCount(int64_t Val) {
+  int count = 0;
+  if (is_int32(Val)) {
+    // Depending on the active bits in the immediate Value v, the following
+    // instruction sequences are emitted:
+    //
+    // v == 0                        : ADDI
+    // v[0,12) != 0 && v[12,32) == 0 : ADDI
+    // v[0,12) == 0 && v[12,32) != 0 : LUI
+    // v[0,32) != 0                  : LUI+ADDI(W)
+    int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
+    int64_t Lo12 = Val << 52 >> 52;
+
+    if (Hi20) {
+      // lui(rd, (int32_t)Hi20);
+      count++;
+    }
+
+    if (Lo12 || Hi20 == 0) {
+      //   unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
+      //   Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
+      count++;
+    }
+    return count;
+  }
+
+  // In the worst case, for a full 64-bit constant, a sequence of 8
+  // instructions (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be
+  // emitted. Note that the first two instructions (LUI+ADDIW) can contribute
+  // up to 32 bits while the following ADDI instructions contribute up to 12
+  // bits each.
+  //
+  // On the first glance, implementing this seems to be possible by simply
+  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many
+  // left shift (SLLI) and immediate additions (ADDI) as needed. However, due
+  // to the fact that ADDI performs a sign extended addition, doing it like
+  // that would only be possible when at most 11 bits of the ADDI instructions
+  // are used. Using all 12 bits of the ADDI instructions, like done by GAS,
+  // actually requires that the constant is processed starting with the least
+  // significant bit.
+  //
+  // In the following, constants are processed from LSB to MSB but instruction
+  // emission is performed from MSB to LSB by recursively calling
+  // generateInstSeq. In each recursion, first the lowest 12 bits are removed
+  // from the constant and the optimal shift amount, which can be greater than
+  // 12 bits if the constant is sparse, is determined. Then, the shifted
+  // remaining constant is processed recursively and gets emitted as soon as
+  // it fits into 32 bits. The emission of the shifts and additions is
+  // subsequently performed when the recursion returns.
+
+  int64_t Lo12 = Val << 52 >> 52;
+  int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
+  int ShiftAmount = 12 + base::bits::CountTrailingZeros((uint64_t)Hi52);
+  Hi52 = signExtend(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+
+  // If the remaining bits don't fit in 12 bits, we might be able to reduce
+  // the shift amount in order to use LUI which will zero the lower 12 bits.
+  bool Unsigned = false;
+  if (ShiftAmount > 12 && !is_int12(Hi52)) {
+    if (is_int32((uint64_t)Hi52 << 12)) {
+      // Reduce the shift amount and add zeros to the LSBs so it will match
+      // LUI.
+      ShiftAmount -= 12;
+      Hi52 = (uint64_t)Hi52 << 12;
+    }
+  }
+
+  count += RecursiveLiImplCount(Hi52);
+
+  if (Unsigned) {
+  } else {
+    // slli(rd, rd, ShiftAmount);
+    count++;
+  }
+  if (Lo12) {
+    // addi(rd, rd, Lo12);
+    count++;
+  }
+  return count;
+}
+
+int Assembler::GeneralLiCount(int64_t imm, bool is_get_temp_reg) {
+  int count = 0;
+  // imitate Assembler::RV_li
+  if (is_int32(imm + 0x800)) {
+    // 32-bit case. Maximum of 2 instructions generated
+    int64_t high_20 = ((imm + 0x800) >> 12);
+    int64_t low_12 = imm << 52 >> 52;
+    if (high_20) {
+      count++;
+      if (low_12) {
+        count++;
+      }
+    } else {
+      count++;
+    }
+    return count;
+  } else {
+    // 64-bit case: divide imm into two 32-bit parts, upper and lower
+    int64_t up_32 = imm >> 32;
+    int64_t low_32 = imm & 0xffffffffull;
+    // Check if a temporary register is available
+    if (is_get_temp_reg) {
+      // keep track of hardware behavior for lower part in sim_low
+      int64_t sim_low = 0;
+      // Build lower part
+      if (low_32 != 0) {
+        int64_t high_20 = ((low_32 + 0x800) >> 12);
+        int64_t low_12 = low_32 & 0xfff;
+        if (high_20) {
+          // Adjust to 20 bits for the case of overflow
+          high_20 &= 0xfffff;
+          sim_low = ((high_20 << 12) << 32) >> 32;
+          count++;
+          if (low_12) {
+            sim_low += (low_12 << 52 >> 52) | low_12;
+            count++;
+          }
+        } else {
+          sim_low = low_12;
+          count++;
+        }
+      }
+      if (sim_low & 0x100000000) {
+        // Bit 31 is 1. Either an overflow or a negative 64 bit
+        if (up_32 == 0) {
+          // Positive number, but overflow because of the add 0x800
+          count++;
+          count++;
+          return count;
+        }
+        // low_32 is a negative 64 bit after the build
+        up_32 = (up_32 - 0xffffffff) & 0xffffffff;
+      }
+      if (up_32 == 0) {
+        return count;
+      }
+      int64_t high_20 = (up_32 + 0x800) >> 12;
+      int64_t low_12 = up_32 & 0xfff;
+      if (high_20) {
+        // Adjust to 20 bits for the case of overflow
+        high_20 &= 0xfffff;
+        count++;
+        if (low_12) {
+          count++;
+        }
+      } else {
+        count++;
+      }
+      // Put it at the bgining of register
+      count++;
+      if (low_32 != 0) {
+        count++;
+      }
+      return count;
+    }
+    // No temp register. Build imm in rd.
+    // Build upper 32 bits first in rd. Divide lower 32 bits parts and add
+    // parts to the upper part by doing shift and add.
+    // First build upper part in rd.
+    int64_t high_20 = (up_32 + 0x800) >> 12;
+    int64_t low_12 = up_32 & 0xfff;
+    if (high_20) {
+      // Adjust to 20 bits for the case of overflow
+      high_20 &= 0xfffff;
+      count++;
+      if (low_12) {
+        count++;
+      }
+    } else {
+      count++;
+    }
+    // upper part already in rd. Each part to be added to rd, has maximum of
+    // 11 bits, and always starts with a 1. rd is shifted by the size of the
+    // part plus the number of zeros between the parts. Each part is added
+    // after the left shift.
+    uint32_t mask = 0x80000000;
+    int32_t i;
+    for (i = 0; i < 32; i++) {
+      if ((low_32 & mask) == 0) {
+        mask >>= 1;
+        if (i == 31) {
+          // rest is zero
+          count++;
+        }
+        continue;
+      }
+      // The first 1 seen
+      if ((i + 11) < 32) {
+        // Pick 11 bits
+        count++;
+        count++;
+        i += 10;
+        mask >>= 11;
+      } else {
+        count++;
+        count++;
+        break;
+      }
+    }
+  }
+  return count;
+}
+#endif
+
 }  // namespace internal
 }  // namespace v8