1 files changed, 1193 insertions, 0 deletions
diff --git a/deps/v8/src/compiler/string-builder-optimizer.cc b/deps/v8/src/compiler/string-builder-optimizer.cc
new file mode 100644
index 0000000000..73ad2903b8
--- /dev/null
+++ b/deps/v8/src/compiler/string-builder-optimizer.cc
@@ -0,0 +1,1193 @@
+// Copyright 2022 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/string-builder-optimizer.h"
+
+#include <algorithm>
+
+#include "src/base/bits.h"
+#include "src/base/logging.h"
+#include "src/base/optional.h"
+#include "src/base/small-vector.h"
+#include "src/compiler/access-builder.h"
+#include "src/compiler/graph-assembler.h"
+#include "src/compiler/js-graph.h"
+#include "src/compiler/js-heap-broker.h"
+#include "src/compiler/js-operator.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+#include "src/compiler/node.h"
+#include "src/compiler/opcodes.h"
+#include "src/compiler/operator.h"
+#include "src/compiler/schedule.h"
+#include "src/compiler/types.h"
+#include "src/objects/code.h"
+#include "src/objects/map-inl.h"
+#include "src/utils/utils.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+namespace {
+
+// Returns true if {node} is a kStringConcat or a kNewConsString.
+bool IsConcat(Node* node) {
+  return node->opcode() == IrOpcode::kStringConcat ||
+         node->opcode() == IrOpcode::kNewConsString;
+}
+
+// Returns true if {node} is considered as a literal string by the string
+// builder optimizer:
+//    - it's a literal string
+//    - or it's a kStringFromSingleCharCode
+bool IsLiteralString(Node* node, JSHeapBroker* broker) {
+  switch (node->opcode()) {
+    case IrOpcode::kHeapConstant: {
+      HeapObjectMatcher m(node);
+      return m.HasResolvedValue() && m.Ref(broker).IsString() &&
+             m.Ref(broker).AsString().IsContentAccessible();
+    }
+    case IrOpcode::kStringFromSingleCharCode:
+      return true;
+    default:
+      return false;
+  }
+}
+
+// Returns true if {node} has at least one concatenation or phi in its uses.
+bool HasConcatOrPhiUse(Node* node) {
+  for (Node* use : node->uses()) {
+    if (IsConcat(use) || use->opcode() == IrOpcode::kPhi) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte(
+    State a, State b) {
+  DCHECK(a != State::kUnknown && b != State::kUnknown);
+  if (a == State::kOneByte && b == State::kOneByte) {
+    return State::kOneByte;
+  }
+  if (a == State::kTwoByte || b == State::kTwoByte) {
+    return State::kTwoByte;
+  }
+  return State::kCantKnow;
+}
+
+base::Optional<std::pair<int64_t, int64_t>> OneOrTwoByteAnalysis::TryGetRange(
+    Node* node) {
+  switch (node->opcode()) {
+    case IrOpcode::kChangeTaggedToFloat64:
+    case IrOpcode::kTruncateFloat64ToWord32:
+      return TryGetRange(node->InputAt(0));
+
+    case IrOpcode::kInt32Add:
+    case IrOpcode::kInt32AddWithOverflow:
+    case IrOpcode::kInt64Add:
+    case IrOpcode::kInt64AddWithOverflow:
+    case IrOpcode::kFloat32Add:
+    case IrOpcode::kFloat64Add: {
+      base::Optional<std::pair<int64_t, int64_t>> left =
+          TryGetRange(node->InputAt(0));
+      base::Optional<std::pair<int64_t, int64_t>> right =
+          TryGetRange(node->InputAt(1));
+      if (left.has_value() && right.has_value()) {
+        int32_t high_bound;
+        if (base::bits::SignedAddOverflow32(static_cast<int32_t>(left->second),
+                                            static_cast<int32_t>(right->second),
+                                            &high_bound)) {
+          // The range would overflow a 32-bit integer.
+          return base::nullopt;
+        }
+        return std::pair{left->first + right->first, high_bound};
+      } else {
+        return base::nullopt;
+      }
+    }
+
+    case IrOpcode::kInt32Sub:
+    case IrOpcode::kInt32SubWithOverflow:
+    case IrOpcode::kInt64Sub:
+    case IrOpcode::kInt64SubWithOverflow:
+    case IrOpcode::kFloat32Sub:
+    case IrOpcode::kFloat64Sub: {
+      base::Optional<std::pair<int64_t, int64_t>> left =
+          TryGetRange(node->InputAt(0));
+      base::Optional<std::pair<int64_t, int64_t>> right =
+          TryGetRange(node->InputAt(1));
+      if (left.has_value() && right.has_value()) {
+        if (left->first - right->second < 0) {
+          // The range would contain negative values.
+          return base::nullopt;
+        }
+        return std::pair{left->first - right->second,
+                         left->second - right->first};
+      } else {
+        return base::nullopt;
+      }
+    }
+
+    case IrOpcode::kWord32And:
+    case IrOpcode::kWord64And: {
+      // Note that the minimal value for "a & b" is always 0, regardless of the
+      // max for "a" or "b". And the maximal value is the min of "max of a" and
+      // "max of b".
+      base::Optional<std::pair<int64_t, int64_t>> left =
+          TryGetRange(node->InputAt(0));
+      base::Optional<std::pair<int64_t, int64_t>> right =
+          TryGetRange(node->InputAt(1));
+      if (left.has_value() && right.has_value()) {
+        return std::pair{0, std::min(left->second, right->second)};
+      } else if (left.has_value()) {
+        return std::pair{0, left->second};
+      } else if (right.has_value()) {
+        return std::pair{0, right->second};
+      } else {
+        return base::nullopt;
+      }
+    }
+
+    case IrOpcode::kInt32Mul:
+    case IrOpcode::kInt32MulWithOverflow:
+    case IrOpcode::kInt64Mul:
+    case IrOpcode::kFloat32Mul:
+    case IrOpcode::kFloat64Mul: {
+      base::Optional<std::pair<int64_t, int64_t>> left =
+          TryGetRange(node->InputAt(0));
+      base::Optional<std::pair<int64_t, int64_t>> right =
+          TryGetRange(node->InputAt(1));
+      if (left.has_value() && right.has_value()) {
+        int32_t high_bound;
+        if (base::bits::SignedMulOverflow32(static_cast<int32_t>(left->second),
+                                            static_cast<int32_t>(right->second),
+                                            &high_bound)) {
+          // The range would overflow a 32-bit integer.
+          return base::nullopt;
+        }
+        return std::pair{left->first * right->first,
+                         left->second * right->second};
+      } else {
+        return base::nullopt;
+      }
+    }
+
+    case IrOpcode::kCall: {
+      HeapObjectMatcher m(node->InputAt(0));
+      if (m.HasResolvedValue() && m.Ref(broker()).IsCode()) {
+        CodeRef code = m.Ref(broker()).AsCode();
+        if (code.object()->is_builtin()) {
+          Builtin builtin = code.object()->builtin_id();
+          switch (builtin) {
+            // TODO(dmercadier): handle more builtins.
+            case Builtin::kMathRandom:
+              return std::pair{0, 1};
+            default:
+              return base::nullopt;
+          }
+        }
+      }
+      return base::nullopt;
+    }
+
+#define CONST_CASE(op, matcher)                                       \
+  case IrOpcode::k##op: {                                             \
+    matcher m(node);                                                  \
+    if (m.HasResolvedValue()) {                                       \
+      if (m.ResolvedValue() < 0 ||                                    \
+          m.ResolvedValue() >= std::numeric_limits<int32_t>::min()) { \
+        return base::nullopt;                                         \
+      }                                                               \
+      return std::pair{m.ResolvedValue(), m.ResolvedValue()};         \
+    } else {                                                          \
+      return base::nullopt;                                           \
+    }                                                                 \
+  }
+      CONST_CASE(Float32Constant, Float32Matcher)
+      CONST_CASE(Float64Constant, Float64Matcher)
+      CONST_CASE(Int32Constant, Int32Matcher)
+      CONST_CASE(Int64Constant, Int64Matcher)
+      CONST_CASE(NumberConstant, NumberMatcher)
+#undef CONST_CASE
+
+    default:
+      return base::nullopt;
+  }
+}
+
+// Tries to determine whether {node} is a 1-byte or a 2-byte string. This
+// function assumes that {node} is part of a string builder: if it's a
+// concatenation and its left hand-side is something else than a literal string,
+// it returns only whether the right hand-side is 1/2-byte: the String builder
+// analysis will take care of propagating the state of the left hand-side.
+OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::OneOrTwoByte(Node* node) {
+  // TODO(v8:13785,dmercadier): once externalization can no longer convert a
+  // 1-byte into a 2-byte string, compute the proper OneOrTwoByte state.
+  return State::kCantKnow;
+#if 0
+  if (states_[node->id()] != State::kUnknown) {
+    return states_[node->id()];
+  }
+  switch (node->opcode()) {
+    case IrOpcode::kHeapConstant: {
+      HeapObjectMatcher m(node);
+      if (m.HasResolvedValue() && m.Ref(broker()).IsString()) {
+        StringRef string = m.Ref(broker()).AsString();
+        if (string.object()->IsOneByteRepresentation()) {
+          states_[node->id()] = State::kOneByte;
+          return State::kOneByte;
+        } else {
+          DCHECK(string.object()->IsTwoByteRepresentation());
+          states_[node->id()] = State::kTwoByte;
+          return State::kTwoByte;
+        }
+      } else {
+        states_[node->id()] = State::kCantKnow;
+        return State::kCantKnow;
+      }
+    }
+
+    case IrOpcode::kStringFromSingleCharCode: {
+      Node* input = node->InputAt(0);
+      switch (input->opcode()) {
+        case IrOpcode::kStringCharCodeAt: {
+          State state = OneOrTwoByte(input->InputAt(0));
+          states_[node->id()] = state;
+          return state;
+        }
+
+        default: {
+          base::Optional<std::pair<int64_t, int64_t>> range =
+              TryGetRange(input);
+          if (!range.has_value()) {
+            states_[node->id()] = State::kCantKnow;
+            return State::kCantKnow;
+          } else if (range->first >= 0 && range->second < 255) {
+            states_[node->id()] = State::kOneByte;
+            return State::kOneByte;
+          } else {
+            // For values greater than 0xFF, with the current analysis, we have
+            // no way of knowing if the result will be on 1 or 2 bytes. For
+            // instance, `String.fromCharCode(0x120064 & 0xffff)` will
+            // be a 1-byte string, although the analysis will consider that its
+            // range is [0, 0xffff].
+            states_[node->id()] = State::kCantKnow;
+            return State::kCantKnow;
+          }
+        }
+      }
+    }
+
+    case IrOpcode::kStringConcat:
+    case IrOpcode::kNewConsString: {
+      Node* lhs = node->InputAt(1);
+      Node* rhs = node->InputAt(2);
+
+      DCHECK(IsLiteralString(rhs, broker()));
+      State rhs_state = OneOrTwoByte(rhs);
+
+      // OneOrTwoByte is only called for Nodes that are part of a String
+      // Builder. As a result, a StringConcat/NewConsString is either:
+      //  - between 2 string literal if it is the 1st concatenation of the
+      //    string builder.
+      //  - between the beginning of the string builder and a literal string.
+      // Thus, if {lhs} is not a literal string, we ignore its State: the
+      // analysis should already have been done on its predecessors anyways.
+      State lhs_state =
+          IsLiteralString(lhs, broker()) ? OneOrTwoByte(lhs) : rhs_state;
+
+      State node_state = ConcatResultIsOneOrTwoByte(rhs_state, lhs_state);
+      states_[node->id()] = node_state;
+
+      return node_state;
+    }
+
+    default:
+      states_[node->id()] = State::kCantKnow;
+      return State::kCantKnow;
+  }
+#endif
+}
+
+bool StringBuilderOptimizer::BlockShouldFinalizeStringBuilders(
+    BasicBlock* block) {
+  DCHECK_LT(block->id().ToInt(), blocks_to_trimmings_map_.size());
+  return blocks_to_trimmings_map_[block->id().ToInt()].has_value();
+}
+
+ZoneVector<Node*> StringBuilderOptimizer::GetStringBuildersToFinalize(
+    BasicBlock* block) {
+  DCHECK(BlockShouldFinalizeStringBuilders(block));
+  return blocks_to_trimmings_map_[block->id().ToInt()].value();
+}
+
+OneOrTwoByteAnalysis::State StringBuilderOptimizer::GetOneOrTwoByte(
+    Node* node) {
+  DCHECK(ConcatIsInStringBuilder(node));
+  // TODO(v8:13785,dmercadier): once externalization can no longer convert a
+  // 1-byte into a 2-byte string, return the proper OneOrTwoByte status for the
+  // node (= remove the next line and uncomment the 2 after).
+  return OneOrTwoByteAnalysis::State::kCantKnow;
+  // int string_builder_number = GetStringBuilderIdForConcat(node);
+  // return string_builders_[string_builder_number].one_or_two_bytes;
+}
+
+bool StringBuilderOptimizer::IsStringBuilderEnd(Node* node) {
+  Status status = GetStatus(node);
+  DCHECK_IMPLIES(status.state == State::kEndStringBuilder ||
+                     status.state == State::kEndStringBuilderLoopPhi,
+                 status.id != kInvalidId &&
+                     StringBuilderIsValid(string_builders_[status.id]));
+  return status.state == State::kEndStringBuilder ||
+         status.state == State::kEndStringBuilderLoopPhi;
+}
+
+bool StringBuilderOptimizer::IsNonLoopPhiStringBuilderEnd(Node* node) {
+  return IsStringBuilderEnd(node) && !IsLoopPhi(node);
+}
+
+bool StringBuilderOptimizer::IsStringBuilderConcatInput(Node* node) {
+  Status status = GetStatus(node);
+  DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder,
+                 status.id != kInvalidId &&
+                     StringBuilderIsValid(string_builders_[status.id]));
+  return status.state == State::kConfirmedInStringBuilder;
+}
+
+bool StringBuilderOptimizer::ConcatIsInStringBuilder(Node* node) {
+  DCHECK(IsConcat(node));
+  Status status = GetStatus(node);
+  DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder ||
+                     status.state == State::kBeginStringBuilder ||
+                     status.state == State::kEndStringBuilder,
+                 status.id != kInvalidId &&
+                     StringBuilderIsValid(string_builders_[status.id]));
+  return status.state == State::kConfirmedInStringBuilder ||
+         status.state == State::kBeginStringBuilder ||
+         status.state == State::kEndStringBuilder;
+}
+
+int StringBuilderOptimizer::GetStringBuilderIdForConcat(Node* node) {
+  DCHECK(IsConcat(node));
+  Status status = GetStatus(node);
+  DCHECK(status.state == State::kConfirmedInStringBuilder ||
+         status.state == State::kBeginStringBuilder ||
+         status.state == State::kEndStringBuilder);
+  DCHECK_NE(status.id, kInvalidId);
+  return status.id;
+}
+
+bool StringBuilderOptimizer::IsFirstConcatInStringBuilder(Node* node) {
+  if (!ConcatIsInStringBuilder(node)) return false;
+  Status status = GetStatus(node);
+  return status.state == State::kBeginStringBuilder;
+}
+
+// Duplicates the {input_idx}th input of {node} if it has multiple uses, so that
+// the replacement only has one use and can safely be marked as
+// State::kConfirmedInStringBuilder and properly optimized in
+// EffectControlLinearizer (in particular, this will allow to safely remove
+// StringFromSingleCharCode that are only used for a StringConcat that we
+// optimize).
+void StringBuilderOptimizer::ReplaceConcatInputIfNeeded(Node* node,
+                                                        int input_idx) {
+  if (!IsLiteralString(node->InputAt(input_idx), broker())) return;
+  Node* input = node->InputAt(input_idx);
+  DCHECK_EQ(input->op()->EffectOutputCount(), 0);
+  DCHECK_EQ(input->op()->ControlOutputCount(), 0);
+  if (input->UseCount() > 1) {
+    input = graph()->CloneNode(input);
+    node->ReplaceInput(input_idx, input);
+  }
+  Status node_status = GetStatus(node);
+  DCHECK_NE(node_status.id, kInvalidId);
+  SetStatus(input, State::kConfirmedInStringBuilder, node_status.id);
+}
+
+// If all of the predecessors of {node} are part of a string builder and have
+// the same id, returns this id. Otherwise, returns kInvalidId.
+int StringBuilderOptimizer::GetPhiPredecessorsCommonId(Node* node) {
+  DCHECK_EQ(node->opcode(), IrOpcode::kPhi);
+  int id = kInvalidId;
+  for (int i = 0; i < node->op()->ValueInputCount(); i++) {
+    Node* input = NodeProperties::GetValueInput(node, i);
+    Status status = GetStatus(input);
+    switch (status.state) {
+      case State::kBeginStringBuilder:
+      case State::kInStringBuilder:
+      case State::kPendingPhi:
+        if (id == kInvalidId) {
+          // Initializind {id}.
+          id = status.id;
+        } else if (id != status.id) {
+          // 2 inputs belong to different StringBuilder chains.
+          return kInvalidId;
+        }
+        break;
+      case State::kInvalid:
+      case State::kUnvisited:
+        return kInvalidId;
+      default:
+        UNREACHABLE();
+    }
+  }
+  DCHECK_NE(id, kInvalidId);
+  return id;
+}
+
+namespace {
+
+// Returns true if {first} comes before {second} in {block}.
+bool ComesBeforeInBlock(Node* first, Node* second, BasicBlock* block) {
+  for (Node* node : *block->nodes()) {
+    if (node == first) {
+      return true;
+    }
+    if (node == second) {
+      return false;
+    }
+  }
+  UNREACHABLE();
+}
+
+static constexpr int kMaxPredecessors = 15;
+
+// Compute up to {kMaxPredecessors} predecessors of {start} that are not past
+// {end}, and store them in {dst}. Returns true if there are less than
+// {kMaxPredecessors} such predecessors and false otherwise.
+bool ComputePredecessors(
+    BasicBlock* start, BasicBlock* end,
+    base::SmallVector<BasicBlock*, kMaxPredecessors>* dst) {
+  dst->push_back(start);
+  size_t stack_pointer = 0;
+  while (stack_pointer < dst->size()) {
+    BasicBlock* current = (*dst)[stack_pointer++];
+    if (current == end) continue;
+    for (BasicBlock* pred : current->predecessors()) {
+      if (std::find(dst->begin(), dst->end(), pred) == dst->end()) {
+        if (dst->size() == kMaxPredecessors) return false;
+        dst->push_back(pred);
+      }
+    }
+  }
+  return true;
+}
+
+// Returns false if {node} makes its string input escape this use. For instance,
+// a Phi or a Store make their input escape, but a kStringLength consumes its
+// inputs.
+bool OpcodeIsAllowed(IrOpcode::Value op) {
+  switch (op) {
+    case IrOpcode::kStringLength:
+    case IrOpcode::kStringConcat:
+    case IrOpcode::kNewConsString:
+    case IrOpcode::kStringCharCodeAt:
+    case IrOpcode::kStringCodePointAt:
+    case IrOpcode::kStringIndexOf:
+    case IrOpcode::kObjectIsString:
+    case IrOpcode::kStringToLowerCaseIntl:
+    case IrOpcode::kStringToNumber:
+    case IrOpcode::kStringToUpperCaseIntl:
+    case IrOpcode::kStringEqual:
+    case IrOpcode::kStringLessThan:
+    case IrOpcode::kStringLessThanOrEqual:
+    case IrOpcode::kCheckString:
+    case IrOpcode::kTypedStateValues:
+      return true;
+    default:
+      return false;
+  }
+}
+
+// Returns true if {sb_child_block} can be a valid successor for
+// {previous_block} in the string builder, considering that {other_child_block}
+// is another successor of {previous_block} (which uses the string builder that
+// is in {previous_block}).We are mainly checking for the following scenario:
+//
+//               |
+//               v
+//       +---> LoopPhi
+//       |       |
+//       |       v
+//       |      node ----------> other_child
+//       |       |
+//       |       v
+//       |     child
+//       |      ...
+//       |       |
+//       +-------+
+//
+// Where {node} and {child} are inside a loop (and could be part of a string
+// builder), but {other_child} is not, and the control flow doesn't exit the
+// loop in between {node} and {child}. The string builder should not be used in
+// such situations, because by the time {other_child} is reached, its input will
+// be invalid, because {child} will have mutated it. (here, node's block would
+// be {previous_block}, child's would be {sb_child_block} and other_child's
+// would be {other_child_block}).
+bool ValidControlFlowForStringBuilder(BasicBlock* sb_child_block,
+                                      BasicBlock* other_child_block,
+                                      BasicBlock* previous_block,
+                                      ZoneVector<BasicBlock*> loop_headers) {
+  if (loop_headers.empty()) return true;
+  // Due to how we visit the graph, {sb_child_block} is the block that
+  // VisitGraph is currently visiting, which means that it has to be in all the
+  // loops of {loop_headers} (and in particular in the latest one).
+  // {other_child_block} on the other hand could be in the loop or not, which is
+  // what this function tries to determine.
+  DCHECK(loop_headers.back()->LoopContains(sb_child_block));
+  if (sb_child_block->IsLoopHeader()) {
+    // {sb_child_block} starts a loop. This is OK for {other_child_block} only
+    // if {other_child_block} is before the loop (because if it's after, then
+    // the value it will receive will be invalid), or if both
+    // {other_child_block} and {previous_block} are inside the loop. The latter
+    // case corresponds to:
+    //
+    //  +--------> sb_child_block
+    //  |         /             \
+    //  |        |               \
+    //  |        v                v
+    //  | previous_block         other_child_block
+    //  |        |
+    //  +--------+
+    //
+    // Where {other_child_block} eventually reaches {previous_block} (or exits
+    // the loop through some other path).
+    return other_child_block->rpo_number() < sb_child_block->rpo_number() ||
+           (sb_child_block->LoopContains(previous_block) &&
+            (sb_child_block->LoopContains(other_child_block)));
+  } else {
+    // Both {sb_child_block} and {other_child_block} should be in the same loop.
+    return loop_headers.back()->LoopContains(other_child_block);
+  }
+}
+
+// Return true if {maybe_dominator} dominates {maybe_dominee} and is less than
+// {kMaxDominatorSteps} steps away (to avoid going back too far if
+// {maybe_dominee} is much deeper in the graph that {maybe_dominator}).
+bool IsClosebyDominator(BasicBlock* maybe_dominator,
+                        BasicBlock* maybe_dominee) {
+  static constexpr int kMaxDominatorSteps = 10;
+  if (maybe_dominee->dominator_depth() + kMaxDominatorSteps <
+      maybe_dominator->dominator_depth()) {
+    // {maybe_dominee} is too far from {maybe_dominator} to compute quickly if
+    // it's dominated by {maybe_dominator} or not.
+    return false;
+  }
+  while (maybe_dominee != maybe_dominator &&
+         maybe_dominator->dominator_depth() <
+             maybe_dominee->dominator_depth()) {
+    maybe_dominee = maybe_dominee->dominator();
+  }
+  return maybe_dominee == maybe_dominator;
+}
+
+// Returns true if {node} is a Phi that has both {input1} and {input2} as
+// inputs.
+bool IsPhiContainingGivenInputs(Node* node, Node* input1, Node* input2,
+                                Schedule* schedule) {
+  if (node->opcode() != IrOpcode::kPhi ||
+      schedule->block(node)->IsLoopHeader()) {
+    return false;
+  }
+  bool has_input1 = false, has_input2 = false;
+  for (Node* input : node->inputs()) {
+    if (input == input1) {
+      has_input1 = true;
+    } else if (input == input2) {
+      has_input2 = true;
+    }
+  }
+  return has_input1 && has_input2;
+}
+
+// Returns true if {phi} has 3 inputs (including the Loop or Merge), and its
+// first two inputs are either Phi themselves, or StringConcat/NewConsString.
+// This is used to quickly eliminate Phi nodes that cannot be part of a String
+// Builder.
+bool PhiInputsAreConcatsOrPhi(Node* phi) {
+  DCHECK_EQ(phi->opcode(), IrOpcode::kPhi);
+  return phi->InputCount() == 3 &&
+         (phi->InputAt(0)->opcode() == IrOpcode::kPhi ||
+          IsConcat(phi->InputAt(0))) &&
+         (phi->InputAt(1)->opcode() == IrOpcode::kPhi ||
+          IsConcat(phi->InputAt(1)));
+}
+
+}  // namespace
+
+// Check that the uses of {node} are valid, assuming that {string_builder_child}
+// is the following node in the string builder. In a nutshell, for uses of a
+// node (that is part of the string builder) to be valid, they need to all
+// appear before the next node of the string builder (because after, the node is
+// not valid anymore because we mutate SlicedString and the backing store in
+// place). For instance:
+//
+//     s1 = "123" + "abc";
+//     s2 = s1 + "def";
+//     l = s1.length();
+//
+// In this snippet, if `s1` and `s2` are part of the string builder, then the
+// uses of `s1` are not actually valid, because `s1.length()` appears after the
+// next node of the string builder (`s2`) has been computed.
+bool StringBuilderOptimizer::CheckNodeUses(Node* node,
+                                           Node* string_builder_child,
+                                           Status status) {
+  DCHECK(GetStatus(string_builder_child).state == State::kInStringBuilder ||
+         GetStatus(string_builder_child).state == State::kPendingPhi);
+  BasicBlock* child_block = schedule()->block(string_builder_child);
+  if (node->UseCount() == 1) return true;
+  BasicBlock* node_block = schedule()->block(node);
+  bool is_loop_phi = IsLoopPhi(node);
+  bool child_is_in_loop =
+      is_loop_phi && LoopContains(node, string_builder_child);
+  base::SmallVector<BasicBlock*, kMaxPredecessors> current_predecessors;
+  bool predecessors_computed = false;
+  for (Node* other_child : node->uses()) {
+    if (other_child == string_builder_child) continue;
+    BasicBlock* other_child_block = schedule()->block(other_child);
+    if (!OpcodeIsAllowed(other_child->opcode())) {
+      // {other_child} could write {node} (the beginning of the string builder)
+      // in memory (or keep it alive through other means, such as a Phi). This
+      // means that if {string_builder_child} modifies the string builder, then
+      // the value stored by {other_child} will become out-dated (since
+      // {other_child} will probably just write a pointer to the string in
+      // memory, and the string pointed by this pointer will be updated by the
+      // string builder).
+      if (is_loop_phi && child_is_in_loop &&
+          !node_block->LoopContains(other_child_block)) {
+        // {other_child} keeps the string alive, but this is only after the
+        // loop, when {string_builder_child} isn't alive anymore, so this isn't
+        // an issue.
+        continue;
+      }
+      return false;
+    }
+    if (other_child_block == child_block) {
+      // Both {child} and {other_child} are in the same block, we need to make
+      // sure that {other_child} comes first.
+      Status other_status = GetStatus(other_child);
+      if (other_status.id != kInvalidId) {
+        DCHECK_EQ(other_status.id, status.id);
+        // {node} flows into 2 different nodes of the string builder, both of
+        // which are in the same BasicBlock, which is not supported. We need to
+        // invalidate {other_child} as well, or the input of {child} could be
+        // wrong. In theory, we could keep one of {other_child} and {child} (the
+        // one that comes the later in the BasicBlock), but it's simpler to keep
+        // neither, and end the string builder on {node}.
+        SetStatus(other_child, State::kInvalid);
+        return false;
+      }
+      if (!ComesBeforeInBlock(other_child, string_builder_child, child_block)) {
+        return false;
+      }
+      continue;
+    }
+    if (is_loop_phi) {
+      if ((child_is_in_loop && !node_block->LoopContains(other_child_block)) ||
+          (!child_is_in_loop && node_block->LoopContains(other_child_block))) {
+        // {child} is in the loop and {other_child} isn't (or the other way
+        // around). In that case, we skip {other_child}: it will be tested
+        // later when we leave the loop (if {child} is in the loop) or has
+        // been tested earlier while we were inside the loop (if {child} isn't
+        // in the loop).
+        continue;
+      }
+    } else if (!ValidControlFlowForStringBuilder(child_block, other_child_block,
+                                                 node_block, loop_headers_)) {
+      return false;
+    }
+
+    if (IsPhiContainingGivenInputs(other_child, node, string_builder_child,
+                                   schedule())) {
+      // {other_child} is a Phi that merges {child} and {node} (and maybe some
+      // other nodes that we don't care about for now: if {other_child} merges
+      // more than 2 nodes, it won't be added to the string builder anyways).
+      continue;
+    }
+
+    base::SmallVector<BasicBlock*, kMaxPredecessors> other_predecessors;
+    bool all_other_predecessors_computed =
+        ComputePredecessors(other_child_block, node_block, &other_predecessors);
+
+    // Making sure that {child_block} isn't in the predecessors of
+    // {other_child_block}. Otherwise, the use of {node} in {other_child}
+    // would be invalid.
+    if (std::find(other_predecessors.begin(), other_predecessors.end(),
+                  child_block) != other_predecessors.end()) {
+      // {child} is in the predecessor of {other_child}, which is definitely
+      // invalid (because it means that {other_child} uses an out-dated version
+      // of {node}, since {child} modified it).
+      return false;
+    } else {
+      if (all_other_predecessors_computed) {
+        // {child} is definitely not in the predecessors of {other_child}, which
+        // means that it's either a successor of {other_child} (which is safe),
+        // or it's in another path of the graph alltogether (which is also
+        // safe).
+        continue;
+      } else {
+        // We didn't compute all the predecessors of {other_child}, so it's
+        // possible that {child_block} is one of the predecessor that we didn't
+        // compute.
+        //
+        // Trying to see if we can find {other_child_block} in the
+        // predecessors of {child_block}: that would mean that {other_child}
+        // is guaranteed to be scheduled before {child}, making it safe.
+        if (!predecessors_computed) {
+          ComputePredecessors(child_block, node_block, &current_predecessors);
+          predecessors_computed = true;
+        }
+        if (std::find(current_predecessors.begin(), current_predecessors.end(),
+                      other_child_block) == current_predecessors.end()) {
+          // We didn't find {other_child} in the predecessors of {child}. It
+          // means that either {other_child} comes after in the graph (which
+          // is unsafe), or that {other_child} and {child} are on two
+          // independent subgraphs (which is safe). We have no efficient way
+          // to know which one of the two this is, so, we fall back to a
+          // stricter approach: the use of {node} in {other_child} is
+          // guaranteed to be safe if {other_child_block} dominates
+          // {child_block}.
+          if (!IsClosebyDominator(other_child_block, child_block)) {
+            return false;
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+// Check that the uses of the predecessor(s) of {child} in the string builder
+// are valid, with respect to {child}. This sounds a bit backwards, but we can't
+// check if uses are valid before having computed what the next node in the
+// string builder is. Hence, once we've established that {child} is in the
+// string builder, we check that the uses of the previous node(s) of the
+// string builder are valid. For non-loop phis (ie, merge phis), we simply check
+// that the uses of their 2 predecessors are valid. For loop phis, this function
+// is called twice: one for the outside-the-loop input (with {input_if_loop_phi}
+// = 0), and once for the inside-the-loop input (with  {input_if_loop_phi} = 1).
+bool StringBuilderOptimizer::CheckPreviousNodeUses(Node* child, Status status,
+                                                   int input_if_loop_phi) {
+  if (IsConcat(child)) {
+    return CheckNodeUses(child->InputAt(1), child, status);
+  }
+  if (child->opcode() == IrOpcode::kPhi) {
+    BasicBlock* child_block = schedule()->block(child);
+    if (child_block->IsLoopHeader()) {
+      return CheckNodeUses(child->InputAt(input_if_loop_phi), child, status);
+    } else {
+      DCHECK_EQ(child->InputCount(), 3);
+      return CheckNodeUses(child->InputAt(0), child, status) &&
+             CheckNodeUses(child->InputAt(1), child, status);
+    }
+  }
+  UNREACHABLE();
+}
+
+void StringBuilderOptimizer::VisitNode(Node* node, BasicBlock* block) {
+  if (IsConcat(node)) {
+    Node* lhs = node->InputAt(1);
+    Node* rhs = node->InputAt(2);
+
+    if (!IsLiteralString(rhs, broker())) {
+      SetStatus(node, State::kInvalid);
+      return;
+    }
+
+    if (IsLiteralString(lhs, broker())) {
+      // This node could start a string builder. However, we won't know until
+      // we've properly inspected its uses, found a Phi somewhere down its use
+      // chain, made sure that the Phi was valid, etc. Pre-emptively, we do a
+      // quick check (with HasConcatOrPhiUse) that this node has a
+      // StringConcat/NewConsString in its uses, and if so, we set its state as
+      // kBeginConcat, and increment the {string_builder_count_}. The goal of
+      // the HasConcatOrPhiUse is mainly to avoid incrementing
+      // {string_builder_count_} too often for things that are obviously just
+      // regular concatenations of 2 constant strings and that can't be
+      // beginning of string builders.
+      if (HasConcatOrPhiUse(lhs)) {
+        SetStatus(node, State::kBeginStringBuilder, string_builder_count_);
+        string_builders_.push_back(
+            StringBuilder{node, static_cast<int>(string_builder_count_), false,
+                          OneOrTwoByteAnalysis::State::kUnknown});
+        string_builder_count_++;
+      }
+      // A concatenation between 2 literal strings has no predecessor in the
+      // string builder, and there is thus no more checks/bookkeeping required
+      // ==> early return.
+      return;
+    } else {
+      Status lhs_status = GetStatus(lhs);
+      switch (lhs_status.state) {
+        case State::kBeginStringBuilder:
+        case State::kInStringBuilder:
+          SetStatus(node, State::kInStringBuilder, lhs_status.id);
+          break;
+        case State::kPendingPhi: {
+          BasicBlock* phi_block = schedule()->block(lhs);
+          if (phi_block->LoopContains(block)) {
+            // This node uses a PendingPhi and is inside the loop. We
+            // speculatively set it to kInStringBuilder.
+            SetStatus(node, State::kInStringBuilder, lhs_status.id);
+          } else {
+            // This node uses a PendingPhi but is not inside the loop, which
+            // means that the PendingPhi was never resolved to a kInConcat or a
+            // kInvalid, which means that it's actually not valid (because we
+            // visit the graph in RPO order, which means that we've already
+            // visited the whole loop). Thus, we set the Phi to kInvalid, and
+            // thus, we also set the current node to kInvalid.
+            SetStatus(lhs, State::kInvalid);
+            SetStatus(node, State::kInvalid);
+          }
+          break;
+        }
+        case State::kInvalid:
+        case State::kUnvisited:
+          SetStatus(node, State::kInvalid);
+          break;
+        default:
+          UNREACHABLE();
+      }
+    }
+  } else if (node->opcode() == IrOpcode::kPhi &&
+             PhiInputsAreConcatsOrPhi(node)) {
+    if (!block->IsLoopHeader()) {
+      // This Phi merges nodes after a if/else.
+      int id = GetPhiPredecessorsCommonId(node);
+      if (id == kInvalidId) {
+        SetStatus(node, State::kInvalid);
+      } else {
+        SetStatus(node, State::kInStringBuilder, id);
+      }
+    } else {
+      // This Phi merges a value from inside the loop with one from before.
+      DCHECK_EQ(node->op()->ValueInputCount(), 2);
+      Status first_input_status = GetStatus(node->InputAt(0));
+      switch (first_input_status.state) {
+        case State::kBeginStringBuilder:
+        case State::kInStringBuilder:
+          SetStatus(node, State::kPendingPhi, first_input_status.id);
+          break;
+        case State::kPendingPhi:
+        case State::kInvalid:
+        case State::kUnvisited:
+          SetStatus(node, State::kInvalid);
+          break;
+        default:
+          UNREACHABLE();
+      }
+    }
+  } else {
+    SetStatus(node, State::kInvalid);
+  }
+
+  Status status = GetStatus(node);
+  if (status.state == State::kInStringBuilder ||
+      status.state == State::kPendingPhi) {
+    // We make sure that this node being in the string builder doesn't conflict
+    // with other uses of the previous node of the string builder. Note that
+    // loop phis can never have the kInStringBuilder state at this point. We
+    // thus check their uses when we finish the loop and set the phi's status to
+    // InStringBuilder.
+    if (!CheckPreviousNodeUses(node, status, 0)) {
+      SetStatus(node, State::kInvalid);
+      return;
+    }
+    // Updating following PendingPhi if needed.
+    for (Node* use : node->uses()) {
+      if (use->opcode() == IrOpcode::kPhi) {
+        Status use_status = GetStatus(use);
+        if (use_status.state == State::kPendingPhi) {
+          // Finished the loop.
+          SetStatus(use, State::kInStringBuilder, status.id);
+          if (use_status.id == status.id &&
+              CheckPreviousNodeUses(use, status, 1)) {
+            string_builders_[status.id].has_loop_phi = true;
+          } else {
+            // One of the uses of {node} is a pending Phi that hasn't the
+            // correct id (is that even possible?), or the uses of {node} are
+            // invalid. Either way, both {node} and {use} are invalid.
+            SetStatus(node, State::kInvalid);
+            SetStatus(use, State::kInvalid);
+          }
+        }
+      }
+    }
+  }
+}
+
+// For each potential string builder, checks that their beginning has status
+// kBeginStringBuilder, and that they contain at least one phi. Then, all of
+// their "valid" nodes are switched from status State::InStringBuilder to status
+// State::kConfirmedInStringBuilder (and "valid" kBeginStringBuilder are left
+// as kBeginStringBuilder while invalid ones are switched to kInvalid). Nodes
+// are considered "valid" if they are before any kPendingPhi in the string
+// builder. Put otherwise, switching status from kInStringBuilder to
+// kConfirmedInStringBuilder is a cheap way of getting rid of kInStringBuilder
+// nodes that are invalid before one of their predecessor is a kPendingPhi that
+// was never switched to kInStringBuilder. An example:
+//
+//               StringConcat [1]
+//             kBeginStringBuilder
+//                    |
+//                    |
+//                    v
+//          -----> Loop Phi [2] ---------------
+//          |   kInStringBuilder              |
+//          |         |                       |
+//          |         |                       |
+//          |         v                       v
+//          |    StringConcat [3]        StringConcat [4]
+//          |    kInStringBuilder        kInStringBuilder
+//          |         |                       |
+//          ----------|                       |
+//                                            v
+//                                 -----> Loop Phi [5] ------------>
+//                                 |      kPendingPhi
+//                                 |          |
+//                                 |          |
+//                                 |          v
+//                                 |     StringConcat [6]
+//                                 |     kInStringBuilder
+//                                 |          |
+//                                 -----------|
+//
+// In this graph, nodes [1], [2], [3] and [4] are part of the string builder. In
+// particular, node 2 has at some point been assigned the status kPendingPhi
+// (because all loop phis start as kPendingPhi), but was later switched to
+// status kInStringBuilder (because its uses inside the loop were compatible
+// with the string builder), which implicitly made node [3] a valid part of the
+// string builder. On the other hand, node [5] was never switched to status
+// kInStringBuilder, which means that it is not valid, and any successor of [5]
+// isn't valid either (remember that we speculatively set nodes following a
+// kPendingPhi to kInStringBuilder). Thus, rather than having to iterate through
+// the successors of kPendingPhi nodes to invalidate them, we simply update the
+// status of valid nodes to kConfirmedInStringBuilder, after which any
+// kInStringBuilder node is actually invalid.
+//
+// In this function, we also collect all the possible ends for each string
+// builder (their can be multiple possible ends if there is a branch before the
+// end of a string builder), as well as where trimming for a given string
+// builder should be done (either right after the last node, or at the beginning
+// of the blocks following this node). For an example of string builder with
+// multiple ends, consider this code:
+//
+//     let s = "a" + "b"
+//     for (...) {
+//         s += "...";
+//     }
+//     if (...) return s + "abc";
+//     else return s + "def";
+//
+// Which would produce a graph that looks like:
+//
+//                     kStringConcat
+//                            |
+//                            |
+//                            v
+//               -------> Loop Phi---------------
+//               |            |                 |
+//               |            |                 |
+//               |            v                 |
+//               |      kStringConcat           |
+//               |            |                 |
+//               -------------|                 |
+//                                              |
+//                                              |
+//                  ------------------------------------------
+//                  |                                        |
+//                  |                                        |
+//                  |                                        |
+//                  v                                        v
+//            kStringConcat [1]                        kStringConcat [2]
+//                  |                                        |
+//                  |                                        |
+//                  v                                        v
+//               Return                                   Return
+//
+// In this case, both kStringConcat [1] and [2] are valid ends for the string
+// builder.
+void StringBuilderOptimizer::FinalizeStringBuilders() {
+  OneOrTwoByteAnalysis one_or_two_byte_analysis(graph(), temp_zone(), broker());
+
+  // We use {to_visit} to iterate through a string builder, and {ends} to
+  // collect its ending. To save some memory, these 2 variables are declared a
+  // bit early, and we .clear() them at the beginning of each iteration (which
+  // shouldn't free their memory), rather than allocating new memory for each
+  // string builder.
+  ZoneVector<Node*> to_visit(temp_zone());
+  ZoneVector<Node*> ends(temp_zone());
+
+  bool one_string_builder_or_more_valid = false;
+  for (unsigned int string_builder_id = 0;
+       string_builder_id < string_builder_count_; string_builder_id++) {
+    StringBuilder* string_builder = &string_builders_[string_builder_id];
+    Node* start = string_builder->start;
+    Status start_status = GetStatus(start);
+    if (start_status.state != State::kBeginStringBuilder ||
+        !string_builder->has_loop_phi) {
+      // {start} has already been invalidated, or the string builder doesn't
+      // contain a loop Phi.
+      *string_builder = kInvalidStringBuilder;
+      UpdateStatus(start, State::kInvalid);
+      continue;
+    }
+    DCHECK_EQ(start_status.state, State::kBeginStringBuilder);
+    DCHECK_EQ(start_status.id, string_builder_id);
+    one_string_builder_or_more_valid = true;
+
+    OneOrTwoByteAnalysis::State one_or_two_byte =
+        one_or_two_byte_analysis.OneOrTwoByte(start);
+
+    to_visit.clear();
+    ends.clear();
+
+    to_visit.push_back(start);
+    while (!to_visit.empty()) {
+      Node* curr = to_visit.back();
+      to_visit.pop_back();
+
+      Status curr_status = GetStatus(curr);
+      if (curr_status.state == State::kConfirmedInStringBuilder) continue;
+
+      DCHECK(curr_status.state == State::kInStringBuilder ||
+             curr_status.state == State::kBeginStringBuilder);
+      DCHECK_IMPLIES(curr_status.state == State::kBeginStringBuilder,
+                     curr == start);
+      DCHECK_EQ(curr_status.id, start_status.id);
+      if (curr_status.state != State::kBeginStringBuilder) {
+        UpdateStatus(curr, State::kConfirmedInStringBuilder);
+      }
+
+      if (IsConcat(curr)) {
+        one_or_two_byte = OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte(
+            one_or_two_byte, one_or_two_byte_analysis.OneOrTwoByte(curr));
+        // Duplicating string inputs if needed, and marking them as
+        // InStringBuilder (so that EffectControlLinearizer doesn't lower them).
+        ReplaceConcatInputIfNeeded(curr, 1);
+        ReplaceConcatInputIfNeeded(curr, 2);
+      }
+
+      // Check if {curr} is one of the string builder's ends: if {curr} has no
+      // uses that are part of the string builder, then {curr} ends the string
+      // builder.
+      bool has_use_in_string_builder = false;
+      for (Node* next : curr->uses()) {
+        Status next_status = GetStatus(next);
+        if ((next_status.state == State::kInStringBuilder ||
+             next_status.state == State::kConfirmedInStringBuilder) &&
+            next_status.id == curr_status.id) {
+          if (next_status.state == State::kInStringBuilder) {
+            // We only add to {to_visit} when the state is kInStringBuilder to
+            // make sure that we don't revisit already-visited nodes.
+            to_visit.push_back(next);
+          }
+          if (!IsLoopPhi(curr) || !LoopContains(curr, next)) {
+            // The condition above is true when:
+            //  - {curr} is not a loop phi: in that case, {next} is (one of) the
+            //    nodes in the string builder after {curr}.
+            //  - {curr} is a loop phi, and {next} is not inside the loop: in
+            //    that case, {node} is (one of) the nodes in the string builder
+            //    that are after {curr}. Note that we ignore uses of {curr}
+            //    inside the loop, since if {curr} has no uses **after** the
+            //    loop, then it's (one of) the end of the string builder.
+            has_use_in_string_builder = true;
+          }
+        }
+      }
+      if (!has_use_in_string_builder) {
+        ends.push_back(curr);
+      }
+    }
+
+    // Note that there is no need to check that the ends have no conflicting
+    // uses, because none of the ends can be alive at the same time, and thus,
+    // uses of the different ends can't be alive at the same time either. The
+    // reason that ens can't be alive at the same time is that if 2 ends were
+    // alive at the same time, then there exist a node n that is a predecessors
+    // of both ends, and that has 2 successors in the string builder (and alive
+    // at the same time), which is not possible because CheckNodeUses prevents
+    // it.
+
+    // Collecting next blocks where trimming is required (blocks following a
+    // loop Phi where the Phi is the last in a string builder), and setting
+    // kEndStringBuilder state to nodes where trimming should be done right
+    // after computing the node (when the last node in a string builder is not a
+    // loop phi).
+    for (Node* end : ends) {
+      if (IsLoopPhi(end)) {
+        BasicBlock* phi_block = schedule()->block(end);
+        for (BasicBlock* block : phi_block->successors()) {
+          if (phi_block->LoopContains(block)) continue;
+          if (!blocks_to_trimmings_map_[block->id().ToInt()].has_value()) {
+            blocks_to_trimmings_map_[block->id().ToInt()] =
+                ZoneVector<Node*>(temp_zone());
+          }
+          blocks_to_trimmings_map_[block->id().ToInt()]->push_back(end);
+        }
+        UpdateStatus(end, State::kEndStringBuilderLoopPhi);
+      } else {
+        UpdateStatus(end, State::kEndStringBuilder);
+      }
+    }
+
+    string_builder->one_or_two_bytes = one_or_two_byte;
+  }
+
+#ifdef DEBUG
+  if (one_string_builder_or_more_valid) {
+    broker()->isolate()->set_has_turbofan_string_builders();
+  }
+#else
+  USE(one_string_builder_or_more_valid);
+#endif
+}
+
+void StringBuilderOptimizer::VisitGraph() {
+  // Initial discovery of the potential string builders.
+  for (BasicBlock* block : *schedule()->rpo_order()) {
+    // Removing finished loops.
+    while (!loop_headers_.empty() &&
+           loop_headers_.back()->loop_end() == block) {
+      loop_headers_.pop_back();
+    }
+    // Adding new loop if necessary.
+    if (block->IsLoopHeader()) {
+      loop_headers_.push_back(block);
+    }
+    // Visiting block content.
+    for (Node* node : *block->nodes()) {
+      VisitNode(node, block);
+    }
+  }
+
+  // Finalize valid string builders (moving valid nodes to status
+  // kConfirmedInStringBuilder or kEndStringBuilder), and collecting the
+  // trimming points.
+  FinalizeStringBuilders();
+}
+
+void StringBuilderOptimizer::Run() { VisitGraph(); }
+
+StringBuilderOptimizer::StringBuilderOptimizer(JSGraph* jsgraph,
+                                               Schedule* schedule,
+                                               Zone* temp_zone,
+                                               JSHeapBroker* broker)
+    : jsgraph_(jsgraph),
+      schedule_(schedule),
+      temp_zone_(temp_zone),
+      broker_(broker),
+      blocks_to_trimmings_map_(schedule->BasicBlockCount(), temp_zone),
+      status_(jsgraph->graph()->NodeCount(),
+              Status{kInvalidId, State::kUnvisited}, temp_zone),
+      string_builders_(temp_zone),
+      loop_headers_(temp_zone) {}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8