summaryrefslogtreecommitdiff
path: root/deps/v8/src/compiler/string-builder-optimizer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/compiler/string-builder-optimizer.cc')
-rw-r--r--deps/v8/src/compiler/string-builder-optimizer.cc1193
1 files changed, 1193 insertions, 0 deletions
diff --git a/deps/v8/src/compiler/string-builder-optimizer.cc b/deps/v8/src/compiler/string-builder-optimizer.cc
new file mode 100644
index 0000000000..73ad2903b8
--- /dev/null
+++ b/deps/v8/src/compiler/string-builder-optimizer.cc
@@ -0,0 +1,1193 @@
+// Copyright 2022 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/string-builder-optimizer.h"
+
+#include <algorithm>
+
+#include "src/base/bits.h"
+#include "src/base/logging.h"
+#include "src/base/optional.h"
+#include "src/base/small-vector.h"
+#include "src/compiler/access-builder.h"
+#include "src/compiler/graph-assembler.h"
+#include "src/compiler/js-graph.h"
+#include "src/compiler/js-heap-broker.h"
+#include "src/compiler/js-operator.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+#include "src/compiler/node.h"
+#include "src/compiler/opcodes.h"
+#include "src/compiler/operator.h"
+#include "src/compiler/schedule.h"
+#include "src/compiler/types.h"
+#include "src/objects/code.h"
+#include "src/objects/map-inl.h"
+#include "src/utils/utils.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+namespace {
+
+// Returns true if {node} is a kStringConcat or a kNewConsString.
+bool IsConcat(Node* node) {
+ return node->opcode() == IrOpcode::kStringConcat ||
+ node->opcode() == IrOpcode::kNewConsString;
+}
+
+// Returns true if {node} is considered as a literal string by the string
+// builder optimizer:
+// - it's a literal string
+// - or it's a kStringFromSingleCharCode
+bool IsLiteralString(Node* node, JSHeapBroker* broker) {
+ switch (node->opcode()) {
+ case IrOpcode::kHeapConstant: {
+ HeapObjectMatcher m(node);
+ return m.HasResolvedValue() && m.Ref(broker).IsString() &&
+ m.Ref(broker).AsString().IsContentAccessible();
+ }
+ case IrOpcode::kStringFromSingleCharCode:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Returns true if {node} has at least one concatenation or phi in its uses.
+bool HasConcatOrPhiUse(Node* node) {
+ for (Node* use : node->uses()) {
+ if (IsConcat(use) || use->opcode() == IrOpcode::kPhi) {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte(
+ State a, State b) {
+ DCHECK(a != State::kUnknown && b != State::kUnknown);
+ if (a == State::kOneByte && b == State::kOneByte) {
+ return State::kOneByte;
+ }
+ if (a == State::kTwoByte || b == State::kTwoByte) {
+ return State::kTwoByte;
+ }
+ return State::kCantKnow;
+}
+
+base::Optional<std::pair<int64_t, int64_t>> OneOrTwoByteAnalysis::TryGetRange(
+ Node* node) {
+ switch (node->opcode()) {
+ case IrOpcode::kChangeTaggedToFloat64:
+ case IrOpcode::kTruncateFloat64ToWord32:
+ return TryGetRange(node->InputAt(0));
+
+ case IrOpcode::kInt32Add:
+ case IrOpcode::kInt32AddWithOverflow:
+ case IrOpcode::kInt64Add:
+ case IrOpcode::kInt64AddWithOverflow:
+ case IrOpcode::kFloat32Add:
+ case IrOpcode::kFloat64Add: {
+ base::Optional<std::pair<int64_t, int64_t>> left =
+ TryGetRange(node->InputAt(0));
+ base::Optional<std::pair<int64_t, int64_t>> right =
+ TryGetRange(node->InputAt(1));
+ if (left.has_value() && right.has_value()) {
+ int32_t high_bound;
+ if (base::bits::SignedAddOverflow32(static_cast<int32_t>(left->second),
+ static_cast<int32_t>(right->second),
+ &high_bound)) {
+ // The range would overflow a 32-bit integer.
+ return base::nullopt;
+ }
+ return std::pair{left->first + right->first, high_bound};
+ } else {
+ return base::nullopt;
+ }
+ }
+
+ case IrOpcode::kInt32Sub:
+ case IrOpcode::kInt32SubWithOverflow:
+ case IrOpcode::kInt64Sub:
+ case IrOpcode::kInt64SubWithOverflow:
+ case IrOpcode::kFloat32Sub:
+ case IrOpcode::kFloat64Sub: {
+ base::Optional<std::pair<int64_t, int64_t>> left =
+ TryGetRange(node->InputAt(0));
+ base::Optional<std::pair<int64_t, int64_t>> right =
+ TryGetRange(node->InputAt(1));
+ if (left.has_value() && right.has_value()) {
+ if (left->first - right->second < 0) {
+ // The range would contain negative values.
+ return base::nullopt;
+ }
+ return std::pair{left->first - right->second,
+ left->second - right->first};
+ } else {
+ return base::nullopt;
+ }
+ }
+
+ case IrOpcode::kWord32And:
+ case IrOpcode::kWord64And: {
+ // Note that the minimal value for "a & b" is always 0, regardless of the
+ // max for "a" or "b". And the maximal value is the min of "max of a" and
+ // "max of b".
+ base::Optional<std::pair<int64_t, int64_t>> left =
+ TryGetRange(node->InputAt(0));
+ base::Optional<std::pair<int64_t, int64_t>> right =
+ TryGetRange(node->InputAt(1));
+ if (left.has_value() && right.has_value()) {
+ return std::pair{0, std::min(left->second, right->second)};
+ } else if (left.has_value()) {
+ return std::pair{0, left->second};
+ } else if (right.has_value()) {
+ return std::pair{0, right->second};
+ } else {
+ return base::nullopt;
+ }
+ }
+
+ case IrOpcode::kInt32Mul:
+ case IrOpcode::kInt32MulWithOverflow:
+ case IrOpcode::kInt64Mul:
+ case IrOpcode::kFloat32Mul:
+ case IrOpcode::kFloat64Mul: {
+ base::Optional<std::pair<int64_t, int64_t>> left =
+ TryGetRange(node->InputAt(0));
+ base::Optional<std::pair<int64_t, int64_t>> right =
+ TryGetRange(node->InputAt(1));
+ if (left.has_value() && right.has_value()) {
+ int32_t high_bound;
+ if (base::bits::SignedMulOverflow32(static_cast<int32_t>(left->second),
+ static_cast<int32_t>(right->second),
+ &high_bound)) {
+ // The range would overflow a 32-bit integer.
+ return base::nullopt;
+ }
+ return std::pair{left->first * right->first,
+ left->second * right->second};
+ } else {
+ return base::nullopt;
+ }
+ }
+
+ case IrOpcode::kCall: {
+ HeapObjectMatcher m(node->InputAt(0));
+ if (m.HasResolvedValue() && m.Ref(broker()).IsCode()) {
+ CodeRef code = m.Ref(broker()).AsCode();
+ if (code.object()->is_builtin()) {
+ Builtin builtin = code.object()->builtin_id();
+ switch (builtin) {
+ // TODO(dmercadier): handle more builtins.
+ case Builtin::kMathRandom:
+ return std::pair{0, 1};
+ default:
+ return base::nullopt;
+ }
+ }
+ }
+ return base::nullopt;
+ }
+
+#define CONST_CASE(op, matcher) \
+ case IrOpcode::k##op: { \
+ matcher m(node); \
+ if (m.HasResolvedValue()) { \
+ if (m.ResolvedValue() < 0 || \
+ m.ResolvedValue() >= std::numeric_limits<int32_t>::min()) { \
+ return base::nullopt; \
+ } \
+ return std::pair{m.ResolvedValue(), m.ResolvedValue()}; \
+ } else { \
+ return base::nullopt; \
+ } \
+ }
+ CONST_CASE(Float32Constant, Float32Matcher)
+ CONST_CASE(Float64Constant, Float64Matcher)
+ CONST_CASE(Int32Constant, Int32Matcher)
+ CONST_CASE(Int64Constant, Int64Matcher)
+ CONST_CASE(NumberConstant, NumberMatcher)
+#undef CONST_CASE
+
+ default:
+ return base::nullopt;
+ }
+}
+
+// Tries to determine whether {node} is a 1-byte or a 2-byte string. This
+// function assumes that {node} is part of a string builder: if it's a
+// concatenation and its left hand-side is something else than a literal string,
+// it returns only whether the right hand-side is 1/2-byte: the String builder
+// analysis will take care of propagating the state of the left hand-side.
+OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::OneOrTwoByte(Node* node) {
+ // TODO(v8:13785,dmercadier): once externalization can no longer convert a
+ // 1-byte into a 2-byte string, compute the proper OneOrTwoByte state.
+ return State::kCantKnow;
+#if 0
+ if (states_[node->id()] != State::kUnknown) {
+ return states_[node->id()];
+ }
+ switch (node->opcode()) {
+ case IrOpcode::kHeapConstant: {
+ HeapObjectMatcher m(node);
+ if (m.HasResolvedValue() && m.Ref(broker()).IsString()) {
+ StringRef string = m.Ref(broker()).AsString();
+ if (string.object()->IsOneByteRepresentation()) {
+ states_[node->id()] = State::kOneByte;
+ return State::kOneByte;
+ } else {
+ DCHECK(string.object()->IsTwoByteRepresentation());
+ states_[node->id()] = State::kTwoByte;
+ return State::kTwoByte;
+ }
+ } else {
+ states_[node->id()] = State::kCantKnow;
+ return State::kCantKnow;
+ }
+ }
+
+ case IrOpcode::kStringFromSingleCharCode: {
+ Node* input = node->InputAt(0);
+ switch (input->opcode()) {
+ case IrOpcode::kStringCharCodeAt: {
+ State state = OneOrTwoByte(input->InputAt(0));
+ states_[node->id()] = state;
+ return state;
+ }
+
+ default: {
+ base::Optional<std::pair<int64_t, int64_t>> range =
+ TryGetRange(input);
+ if (!range.has_value()) {
+ states_[node->id()] = State::kCantKnow;
+ return State::kCantKnow;
+ } else if (range->first >= 0 && range->second < 255) {
+ states_[node->id()] = State::kOneByte;
+ return State::kOneByte;
+ } else {
+ // For values greater than 0xFF, with the current analysis, we have
+ // no way of knowing if the result will be on 1 or 2 bytes. For
+ // instance, `String.fromCharCode(0x120064 & 0xffff)` will
+ // be a 1-byte string, although the analysis will consider that its
+ // range is [0, 0xffff].
+ states_[node->id()] = State::kCantKnow;
+ return State::kCantKnow;
+ }
+ }
+ }
+ }
+
+ case IrOpcode::kStringConcat:
+ case IrOpcode::kNewConsString: {
+ Node* lhs = node->InputAt(1);
+ Node* rhs = node->InputAt(2);
+
+ DCHECK(IsLiteralString(rhs, broker()));
+ State rhs_state = OneOrTwoByte(rhs);
+
+ // OneOrTwoByte is only called for Nodes that are part of a String
+ // Builder. As a result, a StringConcat/NewConsString is either:
+ // - between 2 string literal if it is the 1st concatenation of the
+ // string builder.
+ // - between the beginning of the string builder and a literal string.
+ // Thus, if {lhs} is not a literal string, we ignore its State: the
+ // analysis should already have been done on its predecessors anyways.
+ State lhs_state =
+ IsLiteralString(lhs, broker()) ? OneOrTwoByte(lhs) : rhs_state;
+
+ State node_state = ConcatResultIsOneOrTwoByte(rhs_state, lhs_state);
+ states_[node->id()] = node_state;
+
+ return node_state;
+ }
+
+ default:
+ states_[node->id()] = State::kCantKnow;
+ return State::kCantKnow;
+ }
+#endif
+}
+
+bool StringBuilderOptimizer::BlockShouldFinalizeStringBuilders(
+ BasicBlock* block) {
+ DCHECK_LT(block->id().ToInt(), blocks_to_trimmings_map_.size());
+ return blocks_to_trimmings_map_[block->id().ToInt()].has_value();
+}
+
+ZoneVector<Node*> StringBuilderOptimizer::GetStringBuildersToFinalize(
+ BasicBlock* block) {
+ DCHECK(BlockShouldFinalizeStringBuilders(block));
+ return blocks_to_trimmings_map_[block->id().ToInt()].value();
+}
+
+OneOrTwoByteAnalysis::State StringBuilderOptimizer::GetOneOrTwoByte(
+ Node* node) {
+ DCHECK(ConcatIsInStringBuilder(node));
+ // TODO(v8:13785,dmercadier): once externalization can no longer convert a
+ // 1-byte into a 2-byte string, return the proper OneOrTwoByte status for the
+ // node (= remove the next line and uncomment the 2 after).
+ return OneOrTwoByteAnalysis::State::kCantKnow;
+ // int string_builder_number = GetStringBuilderIdForConcat(node);
+ // return string_builders_[string_builder_number].one_or_two_bytes;
+}
+
+bool StringBuilderOptimizer::IsStringBuilderEnd(Node* node) {
+ Status status = GetStatus(node);
+ DCHECK_IMPLIES(status.state == State::kEndStringBuilder ||
+ status.state == State::kEndStringBuilderLoopPhi,
+ status.id != kInvalidId &&
+ StringBuilderIsValid(string_builders_[status.id]));
+ return status.state == State::kEndStringBuilder ||
+ status.state == State::kEndStringBuilderLoopPhi;
+}
+
+bool StringBuilderOptimizer::IsNonLoopPhiStringBuilderEnd(Node* node) {
+ return IsStringBuilderEnd(node) && !IsLoopPhi(node);
+}
+
+bool StringBuilderOptimizer::IsStringBuilderConcatInput(Node* node) {
+ Status status = GetStatus(node);
+ DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder,
+ status.id != kInvalidId &&
+ StringBuilderIsValid(string_builders_[status.id]));
+ return status.state == State::kConfirmedInStringBuilder;
+}
+
+bool StringBuilderOptimizer::ConcatIsInStringBuilder(Node* node) {
+ DCHECK(IsConcat(node));
+ Status status = GetStatus(node);
+ DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder ||
+ status.state == State::kBeginStringBuilder ||
+ status.state == State::kEndStringBuilder,
+ status.id != kInvalidId &&
+ StringBuilderIsValid(string_builders_[status.id]));
+ return status.state == State::kConfirmedInStringBuilder ||
+ status.state == State::kBeginStringBuilder ||
+ status.state == State::kEndStringBuilder;
+}
+
+int StringBuilderOptimizer::GetStringBuilderIdForConcat(Node* node) {
+ DCHECK(IsConcat(node));
+ Status status = GetStatus(node);
+ DCHECK(status.state == State::kConfirmedInStringBuilder ||
+ status.state == State::kBeginStringBuilder ||
+ status.state == State::kEndStringBuilder);
+ DCHECK_NE(status.id, kInvalidId);
+ return status.id;
+}
+
+bool StringBuilderOptimizer::IsFirstConcatInStringBuilder(Node* node) {
+ if (!ConcatIsInStringBuilder(node)) return false;
+ Status status = GetStatus(node);
+ return status.state == State::kBeginStringBuilder;
+}
+
+// Duplicates the {input_idx}th input of {node} if it has multiple uses, so that
+// the replacement only has one use and can safely be marked as
+// State::kConfirmedInStringBuilder and properly optimized in
+// EffectControlLinearizer (in particular, this will allow to safely remove
+// StringFromSingleCharCode that are only used for a StringConcat that we
+// optimize).
+void StringBuilderOptimizer::ReplaceConcatInputIfNeeded(Node* node,
+ int input_idx) {
+ if (!IsLiteralString(node->InputAt(input_idx), broker())) return;
+ Node* input = node->InputAt(input_idx);
+ DCHECK_EQ(input->op()->EffectOutputCount(), 0);
+ DCHECK_EQ(input->op()->ControlOutputCount(), 0);
+ if (input->UseCount() > 1) {
+ input = graph()->CloneNode(input);
+ node->ReplaceInput(input_idx, input);
+ }
+ Status node_status = GetStatus(node);
+ DCHECK_NE(node_status.id, kInvalidId);
+ SetStatus(input, State::kConfirmedInStringBuilder, node_status.id);
+}
+
+// If all of the predecessors of {node} are part of a string builder and have
+// the same id, returns this id. Otherwise, returns kInvalidId.
+int StringBuilderOptimizer::GetPhiPredecessorsCommonId(Node* node) {
+ DCHECK_EQ(node->opcode(), IrOpcode::kPhi);
+ int id = kInvalidId;
+ for (int i = 0; i < node->op()->ValueInputCount(); i++) {
+ Node* input = NodeProperties::GetValueInput(node, i);
+ Status status = GetStatus(input);
+ switch (status.state) {
+ case State::kBeginStringBuilder:
+ case State::kInStringBuilder:
+ case State::kPendingPhi:
+ if (id == kInvalidId) {
+ // Initializind {id}.
+ id = status.id;
+ } else if (id != status.id) {
+ // 2 inputs belong to different StringBuilder chains.
+ return kInvalidId;
+ }
+ break;
+ case State::kInvalid:
+ case State::kUnvisited:
+ return kInvalidId;
+ default:
+ UNREACHABLE();
+ }
+ }
+ DCHECK_NE(id, kInvalidId);
+ return id;
+}
+
+namespace {
+
+// Returns true if {first} comes before {second} in {block}.
+bool ComesBeforeInBlock(Node* first, Node* second, BasicBlock* block) {
+ for (Node* node : *block->nodes()) {
+ if (node == first) {
+ return true;
+ }
+ if (node == second) {
+ return false;
+ }
+ }
+ UNREACHABLE();
+}
+
+static constexpr int kMaxPredecessors = 15;
+
+// Compute up to {kMaxPredecessors} predecessors of {start} that are not past
+// {end}, and store them in {dst}. Returns true if there are less than
+// {kMaxPredecessors} such predecessors and false otherwise.
+bool ComputePredecessors(
+ BasicBlock* start, BasicBlock* end,
+ base::SmallVector<BasicBlock*, kMaxPredecessors>* dst) {
+ dst->push_back(start);
+ size_t stack_pointer = 0;
+ while (stack_pointer < dst->size()) {
+ BasicBlock* current = (*dst)[stack_pointer++];
+ if (current == end) continue;
+ for (BasicBlock* pred : current->predecessors()) {
+ if (std::find(dst->begin(), dst->end(), pred) == dst->end()) {
+ if (dst->size() == kMaxPredecessors) return false;
+ dst->push_back(pred);
+ }
+ }
+ }
+ return true;
+}
+
+// Returns false if {node} makes its string input escape this use. For instance,
+// a Phi or a Store make their input escape, but a kStringLength consumes its
+// inputs.
+bool OpcodeIsAllowed(IrOpcode::Value op) {
+ switch (op) {
+ case IrOpcode::kStringLength:
+ case IrOpcode::kStringConcat:
+ case IrOpcode::kNewConsString:
+ case IrOpcode::kStringCharCodeAt:
+ case IrOpcode::kStringCodePointAt:
+ case IrOpcode::kStringIndexOf:
+ case IrOpcode::kObjectIsString:
+ case IrOpcode::kStringToLowerCaseIntl:
+ case IrOpcode::kStringToNumber:
+ case IrOpcode::kStringToUpperCaseIntl:
+ case IrOpcode::kStringEqual:
+ case IrOpcode::kStringLessThan:
+ case IrOpcode::kStringLessThanOrEqual:
+ case IrOpcode::kCheckString:
+ case IrOpcode::kTypedStateValues:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Returns true if {sb_child_block} can be a valid successor for
+// {previous_block} in the string builder, considering that {other_child_block}
+// is another successor of {previous_block} (which uses the string builder that
+// is in {previous_block}).We are mainly checking for the following scenario:
+//
+// |
+// v
+// +---> LoopPhi
+// | |
+// | v
+// | node ----------> other_child
+// | |
+// | v
+// | child
+// | ...
+// | |
+// +-------+
+//
+// Where {node} and {child} are inside a loop (and could be part of a string
+// builder), but {other_child} is not, and the control flow doesn't exit the
+// loop in between {node} and {child}. The string builder should not be used in
+// such situations, because by the time {other_child} is reached, its input will
+// be invalid, because {child} will have mutated it. (here, node's block would
+// be {previous_block}, child's would be {sb_child_block} and other_child's
+// would be {other_child_block}).
+bool ValidControlFlowForStringBuilder(BasicBlock* sb_child_block,
+ BasicBlock* other_child_block,
+ BasicBlock* previous_block,
+ ZoneVector<BasicBlock*> loop_headers) {
+ if (loop_headers.empty()) return true;
+ // Due to how we visit the graph, {sb_child_block} is the block that
+ // VisitGraph is currently visiting, which means that it has to be in all the
+ // loops of {loop_headers} (and in particular in the latest one).
+ // {other_child_block} on the other hand could be in the loop or not, which is
+ // what this function tries to determine.
+ DCHECK(loop_headers.back()->LoopContains(sb_child_block));
+ if (sb_child_block->IsLoopHeader()) {
+ // {sb_child_block} starts a loop. This is OK for {other_child_block} only
+ // if {other_child_block} is before the loop (because if it's after, then
+ // the value it will receive will be invalid), or if both
+ // {other_child_block} and {previous_block} are inside the loop. The latter
+ // case corresponds to:
+ //
+ // +--------> sb_child_block
+ // | / \
+ // | | \
+ // | v v
+ // | previous_block other_child_block
+ // | |
+ // +--------+
+ //
+ // Where {other_child_block} eventually reaches {previous_block} (or exits
+ // the loop through some other path).
+ return other_child_block->rpo_number() < sb_child_block->rpo_number() ||
+ (sb_child_block->LoopContains(previous_block) &&
+ (sb_child_block->LoopContains(other_child_block)));
+ } else {
+ // Both {sb_child_block} and {other_child_block} should be in the same loop.
+ return loop_headers.back()->LoopContains(other_child_block);
+ }
+}
+
+// Return true if {maybe_dominator} dominates {maybe_dominee} and is less than
+// {kMaxDominatorSteps} steps away (to avoid going back too far if
+// {maybe_dominee} is much deeper in the graph that {maybe_dominator}).
+bool IsClosebyDominator(BasicBlock* maybe_dominator,
+ BasicBlock* maybe_dominee) {
+ static constexpr int kMaxDominatorSteps = 10;
+ if (maybe_dominee->dominator_depth() + kMaxDominatorSteps <
+ maybe_dominator->dominator_depth()) {
+ // {maybe_dominee} is too far from {maybe_dominator} to compute quickly if
+ // it's dominated by {maybe_dominator} or not.
+ return false;
+ }
+ while (maybe_dominee != maybe_dominator &&
+ maybe_dominator->dominator_depth() <
+ maybe_dominee->dominator_depth()) {
+ maybe_dominee = maybe_dominee->dominator();
+ }
+ return maybe_dominee == maybe_dominator;
+}
+
+// Returns true if {node} is a Phi that has both {input1} and {input2} as
+// inputs.
+bool IsPhiContainingGivenInputs(Node* node, Node* input1, Node* input2,
+ Schedule* schedule) {
+ if (node->opcode() != IrOpcode::kPhi ||
+ schedule->block(node)->IsLoopHeader()) {
+ return false;
+ }
+ bool has_input1 = false, has_input2 = false;
+ for (Node* input : node->inputs()) {
+ if (input == input1) {
+ has_input1 = true;
+ } else if (input == input2) {
+ has_input2 = true;
+ }
+ }
+ return has_input1 && has_input2;
+}
+
+// Returns true if {phi} has 3 inputs (including the Loop or Merge), and its
+// first two inputs are either Phi themselves, or StringConcat/NewConsString.
+// This is used to quickly eliminate Phi nodes that cannot be part of a String
+// Builder.
+bool PhiInputsAreConcatsOrPhi(Node* phi) {
+ DCHECK_EQ(phi->opcode(), IrOpcode::kPhi);
+ return phi->InputCount() == 3 &&
+ (phi->InputAt(0)->opcode() == IrOpcode::kPhi ||
+ IsConcat(phi->InputAt(0))) &&
+ (phi->InputAt(1)->opcode() == IrOpcode::kPhi ||
+ IsConcat(phi->InputAt(1)));
+}
+
+} // namespace
+
+// Check that the uses of {node} are valid, assuming that {string_builder_child}
+// is the following node in the string builder. In a nutshell, for uses of a
+// node (that is part of the string builder) to be valid, they need to all
+// appear before the next node of the string builder (because after, the node is
+// not valid anymore because we mutate SlicedString and the backing store in
+// place). For instance:
+//
+// s1 = "123" + "abc";
+// s2 = s1 + "def";
+// l = s1.length();
+//
+// In this snippet, if `s1` and `s2` are part of the string builder, then the
+// uses of `s1` are not actually valid, because `s1.length()` appears after the
+// next node of the string builder (`s2`) has been computed.
+bool StringBuilderOptimizer::CheckNodeUses(Node* node,
+ Node* string_builder_child,
+ Status status) {
+ DCHECK(GetStatus(string_builder_child).state == State::kInStringBuilder ||
+ GetStatus(string_builder_child).state == State::kPendingPhi);
+ BasicBlock* child_block = schedule()->block(string_builder_child);
+ if (node->UseCount() == 1) return true;
+ BasicBlock* node_block = schedule()->block(node);
+ bool is_loop_phi = IsLoopPhi(node);
+ bool child_is_in_loop =
+ is_loop_phi && LoopContains(node, string_builder_child);
+ base::SmallVector<BasicBlock*, kMaxPredecessors> current_predecessors;
+ bool predecessors_computed = false;
+ for (Node* other_child : node->uses()) {
+ if (other_child == string_builder_child) continue;
+ BasicBlock* other_child_block = schedule()->block(other_child);
+ if (!OpcodeIsAllowed(other_child->opcode())) {
+ // {other_child} could write {node} (the beginning of the string builder)
+ // in memory (or keep it alive through other means, such as a Phi). This
+ // means that if {string_builder_child} modifies the string builder, then
+ // the value stored by {other_child} will become out-dated (since
+ // {other_child} will probably just write a pointer to the string in
+ // memory, and the string pointed by this pointer will be updated by the
+ // string builder).
+ if (is_loop_phi && child_is_in_loop &&
+ !node_block->LoopContains(other_child_block)) {
+ // {other_child} keeps the string alive, but this is only after the
+ // loop, when {string_builder_child} isn't alive anymore, so this isn't
+ // an issue.
+ continue;
+ }
+ return false;
+ }
+ if (other_child_block == child_block) {
+ // Both {child} and {other_child} are in the same block, we need to make
+ // sure that {other_child} comes first.
+ Status other_status = GetStatus(other_child);
+ if (other_status.id != kInvalidId) {
+ DCHECK_EQ(other_status.id, status.id);
+ // {node} flows into 2 different nodes of the string builder, both of
+ // which are in the same BasicBlock, which is not supported. We need to
+ // invalidate {other_child} as well, or the input of {child} could be
+ // wrong. In theory, we could keep one of {other_child} and {child} (the
+ // one that comes the later in the BasicBlock), but it's simpler to keep
+ // neither, and end the string builder on {node}.
+ SetStatus(other_child, State::kInvalid);
+ return false;
+ }
+ if (!ComesBeforeInBlock(other_child, string_builder_child, child_block)) {
+ return false;
+ }
+ continue;
+ }
+ if (is_loop_phi) {
+ if ((child_is_in_loop && !node_block->LoopContains(other_child_block)) ||
+ (!child_is_in_loop && node_block->LoopContains(other_child_block))) {
+ // {child} is in the loop and {other_child} isn't (or the other way
+ // around). In that case, we skip {other_child}: it will be tested
+ // later when we leave the loop (if {child} is in the loop) or has
+ // been tested earlier while we were inside the loop (if {child} isn't
+ // in the loop).
+ continue;
+ }
+ } else if (!ValidControlFlowForStringBuilder(child_block, other_child_block,
+ node_block, loop_headers_)) {
+ return false;
+ }
+
+ if (IsPhiContainingGivenInputs(other_child, node, string_builder_child,
+ schedule())) {
+ // {other_child} is a Phi that merges {child} and {node} (and maybe some
+ // other nodes that we don't care about for now: if {other_child} merges
+ // more than 2 nodes, it won't be added to the string builder anyways).
+ continue;
+ }
+
+ base::SmallVector<BasicBlock*, kMaxPredecessors> other_predecessors;
+ bool all_other_predecessors_computed =
+ ComputePredecessors(other_child_block, node_block, &other_predecessors);
+
+ // Making sure that {child_block} isn't in the predecessors of
+ // {other_child_block}. Otherwise, the use of {node} in {other_child}
+ // would be invalid.
+ if (std::find(other_predecessors.begin(), other_predecessors.end(),
+ child_block) != other_predecessors.end()) {
+ // {child} is in the predecessor of {other_child}, which is definitely
+ // invalid (because it means that {other_child} uses an out-dated version
+ // of {node}, since {child} modified it).
+ return false;
+ } else {
+ if (all_other_predecessors_computed) {
+ // {child} is definitely not in the predecessors of {other_child}, which
+ // means that it's either a successor of {other_child} (which is safe),
+ // or it's in another path of the graph alltogether (which is also
+ // safe).
+ continue;
+ } else {
+ // We didn't compute all the predecessors of {other_child}, so it's
+ // possible that {child_block} is one of the predecessor that we didn't
+ // compute.
+ //
+ // Trying to see if we can find {other_child_block} in the
+ // predecessors of {child_block}: that would mean that {other_child}
+ // is guaranteed to be scheduled before {child}, making it safe.
+ if (!predecessors_computed) {
+ ComputePredecessors(child_block, node_block, &current_predecessors);
+ predecessors_computed = true;
+ }
+ if (std::find(current_predecessors.begin(), current_predecessors.end(),
+ other_child_block) == current_predecessors.end()) {
+ // We didn't find {other_child} in the predecessors of {child}. It
+ // means that either {other_child} comes after in the graph (which
+ // is unsafe), or that {other_child} and {child} are on two
+ // independent subgraphs (which is safe). We have no efficient way
+ // to know which one of the two this is, so, we fall back to a
+ // stricter approach: the use of {node} in {other_child} is
+ // guaranteed to be safe if {other_child_block} dominates
+ // {child_block}.
+ if (!IsClosebyDominator(other_child_block, child_block)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+// Check that the uses of the predecessor(s) of {child} in the string builder
+// are valid, with respect to {child}. This sounds a bit backwards, but we can't
+// check if uses are valid before having computed what the next node in the
+// string builder is. Hence, once we've established that {child} is in the
+// string builder, we check that the uses of the previous node(s) of the
+// string builder are valid. For non-loop phis (ie, merge phis), we simply check
+// that the uses of their 2 predecessors are valid. For loop phis, this function
+// is called twice: one for the outside-the-loop input (with {input_if_loop_phi}
+// = 0), and once for the inside-the-loop input (with {input_if_loop_phi} = 1).
+bool StringBuilderOptimizer::CheckPreviousNodeUses(Node* child, Status status,
+ int input_if_loop_phi) {
+ if (IsConcat(child)) {
+ return CheckNodeUses(child->InputAt(1), child, status);
+ }
+ if (child->opcode() == IrOpcode::kPhi) {
+ BasicBlock* child_block = schedule()->block(child);
+ if (child_block->IsLoopHeader()) {
+ return CheckNodeUses(child->InputAt(input_if_loop_phi), child, status);
+ } else {
+ DCHECK_EQ(child->InputCount(), 3);
+ return CheckNodeUses(child->InputAt(0), child, status) &&
+ CheckNodeUses(child->InputAt(1), child, status);
+ }
+ }
+ UNREACHABLE();
+}
+
+void StringBuilderOptimizer::VisitNode(Node* node, BasicBlock* block) {
+ if (IsConcat(node)) {
+ Node* lhs = node->InputAt(1);
+ Node* rhs = node->InputAt(2);
+
+ if (!IsLiteralString(rhs, broker())) {
+ SetStatus(node, State::kInvalid);
+ return;
+ }
+
+ if (IsLiteralString(lhs, broker())) {
+ // This node could start a string builder. However, we won't know until
+ // we've properly inspected its uses, found a Phi somewhere down its use
+ // chain, made sure that the Phi was valid, etc. Pre-emptively, we do a
+ // quick check (with HasConcatOrPhiUse) that this node has a
+ // StringConcat/NewConsString in its uses, and if so, we set its state as
+ // kBeginConcat, and increment the {string_builder_count_}. The goal of
+ // the HasConcatOrPhiUse is mainly to avoid incrementing
+ // {string_builder_count_} too often for things that are obviously just
+ // regular concatenations of 2 constant strings and that can't be
+ // beginning of string builders.
+ if (HasConcatOrPhiUse(lhs)) {
+ SetStatus(node, State::kBeginStringBuilder, string_builder_count_);
+ string_builders_.push_back(
+ StringBuilder{node, static_cast<int>(string_builder_count_), false,
+ OneOrTwoByteAnalysis::State::kUnknown});
+ string_builder_count_++;
+ }
+ // A concatenation between 2 literal strings has no predecessor in the
+ // string builder, and there is thus no more checks/bookkeeping required
+ // ==> early return.
+ return;
+ } else {
+ Status lhs_status = GetStatus(lhs);
+ switch (lhs_status.state) {
+ case State::kBeginStringBuilder:
+ case State::kInStringBuilder:
+ SetStatus(node, State::kInStringBuilder, lhs_status.id);
+ break;
+ case State::kPendingPhi: {
+ BasicBlock* phi_block = schedule()->block(lhs);
+ if (phi_block->LoopContains(block)) {
+ // This node uses a PendingPhi and is inside the loop. We
+ // speculatively set it to kInStringBuilder.
+ SetStatus(node, State::kInStringBuilder, lhs_status.id);
+ } else {
+ // This node uses a PendingPhi but is not inside the loop, which
+ // means that the PendingPhi was never resolved to a kInConcat or a
+ // kInvalid, which means that it's actually not valid (because we
+ // visit the graph in RPO order, which means that we've already
+ // visited the whole loop). Thus, we set the Phi to kInvalid, and
+ // thus, we also set the current node to kInvalid.
+ SetStatus(lhs, State::kInvalid);
+ SetStatus(node, State::kInvalid);
+ }
+ break;
+ }
+ case State::kInvalid:
+ case State::kUnvisited:
+ SetStatus(node, State::kInvalid);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+ } else if (node->opcode() == IrOpcode::kPhi &&
+ PhiInputsAreConcatsOrPhi(node)) {
+ if (!block->IsLoopHeader()) {
+ // This Phi merges nodes after a if/else.
+ int id = GetPhiPredecessorsCommonId(node);
+ if (id == kInvalidId) {
+ SetStatus(node, State::kInvalid);
+ } else {
+ SetStatus(node, State::kInStringBuilder, id);
+ }
+ } else {
+ // This Phi merges a value from inside the loop with one from before.
+ DCHECK_EQ(node->op()->ValueInputCount(), 2);
+ Status first_input_status = GetStatus(node->InputAt(0));
+ switch (first_input_status.state) {
+ case State::kBeginStringBuilder:
+ case State::kInStringBuilder:
+ SetStatus(node, State::kPendingPhi, first_input_status.id);
+ break;
+ case State::kPendingPhi:
+ case State::kInvalid:
+ case State::kUnvisited:
+ SetStatus(node, State::kInvalid);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+ } else {
+ SetStatus(node, State::kInvalid);
+ }
+
+ Status status = GetStatus(node);
+ if (status.state == State::kInStringBuilder ||
+ status.state == State::kPendingPhi) {
+ // We make sure that this node being in the string builder doesn't conflict
+ // with other uses of the previous node of the string builder. Note that
+ // loop phis can never have the kInStringBuilder state at this point. We
+ // thus check their uses when we finish the loop and set the phi's status to
+ // InStringBuilder.
+ if (!CheckPreviousNodeUses(node, status, 0)) {
+ SetStatus(node, State::kInvalid);
+ return;
+ }
+ // Updating following PendingPhi if needed.
+ for (Node* use : node->uses()) {
+ if (use->opcode() == IrOpcode::kPhi) {
+ Status use_status = GetStatus(use);
+ if (use_status.state == State::kPendingPhi) {
+ // Finished the loop.
+ SetStatus(use, State::kInStringBuilder, status.id);
+ if (use_status.id == status.id &&
+ CheckPreviousNodeUses(use, status, 1)) {
+ string_builders_[status.id].has_loop_phi = true;
+ } else {
+ // One of the uses of {node} is a pending Phi that hasn't the
+ // correct id (is that even possible?), or the uses of {node} are
+ // invalid. Either way, both {node} and {use} are invalid.
+ SetStatus(node, State::kInvalid);
+ SetStatus(use, State::kInvalid);
+ }
+ }
+ }
+ }
+ }
+}
+
+// For each potential string builder, checks that their beginning has status
+// kBeginStringBuilder, and that they contain at least one phi. Then, all of
+// their "valid" nodes are switched from status State::InStringBuilder to status
+// State::kConfirmedInStringBuilder (and "valid" kBeginStringBuilder are left
+// as kBeginStringBuilder while invalid ones are switched to kInvalid). Nodes
+// are considered "valid" if they are before any kPendingPhi in the string
+// builder. Put otherwise, switching status from kInStringBuilder to
+// kConfirmedInStringBuilder is a cheap way of getting rid of kInStringBuilder
+// nodes that are invalid before one of their predecessor is a kPendingPhi that
+// was never switched to kInStringBuilder. An example:
+//
+// StringConcat [1]
+// kBeginStringBuilder
+// |
+// |
+// v
+// -----> Loop Phi [2] ---------------
+// | kInStringBuilder |
+// | | |
+// | | |
+// | v v
+// | StringConcat [3] StringConcat [4]
+// | kInStringBuilder kInStringBuilder
+// | | |
+// ----------| |
+// v
+// -----> Loop Phi [5] ------------>
+// | kPendingPhi
+// | |
+// | |
+// | v
+// | StringConcat [6]
+// | kInStringBuilder
+// | |
+// -----------|
+//
+// In this graph, nodes [1], [2], [3] and [4] are part of the string builder. In
+// particular, node 2 has at some point been assigned the status kPendingPhi
+// (because all loop phis start as kPendingPhi), but was later switched to
+// status kInStringBuilder (because its uses inside the loop were compatible
+// with the string builder), which implicitly made node [3] a valid part of the
+// string builder. On the other hand, node [5] was never switched to status
+// kInStringBuilder, which means that it is not valid, and any successor of [5]
+// isn't valid either (remember that we speculatively set nodes following a
+// kPendingPhi to kInStringBuilder). Thus, rather than having to iterate through
+// the successors of kPendingPhi nodes to invalidate them, we simply update the
+// status of valid nodes to kConfirmedInStringBuilder, after which any
+// kInStringBuilder node is actually invalid.
+//
+// In this function, we also collect all the possible ends for each string
+// builder (their can be multiple possible ends if there is a branch before the
+// end of a string builder), as well as where trimming for a given string
+// builder should be done (either right after the last node, or at the beginning
+// of the blocks following this node). For an example of string builder with
+// multiple ends, consider this code:
+//
+// let s = "a" + "b"
+// for (...) {
+// s += "...";
+// }
+// if (...) return s + "abc";
+// else return s + "def";
+//
+// Which would produce a graph that looks like:
+//
+// kStringConcat
+// |
+// |
+// v
+// -------> Loop Phi---------------
+// | | |
+// | | |
+// | v |
+// | kStringConcat |
+// | | |
+// -------------| |
+// |
+// |
+// ------------------------------------------
+// | |
+// | |
+// | |
+// v v
+// kStringConcat [1] kStringConcat [2]
+// | |
+// | |
+// v v
+// Return Return
+//
+// In this case, both kStringConcat [1] and [2] are valid ends for the string
+// builder.
+void StringBuilderOptimizer::FinalizeStringBuilders() {
+ OneOrTwoByteAnalysis one_or_two_byte_analysis(graph(), temp_zone(), broker());
+
+ // We use {to_visit} to iterate through a string builder, and {ends} to
+ // collect its ending. To save some memory, these 2 variables are declared a
+ // bit early, and we .clear() them at the beginning of each iteration (which
+ // shouldn't free their memory), rather than allocating new memory for each
+ // string builder.
+ ZoneVector<Node*> to_visit(temp_zone());
+ ZoneVector<Node*> ends(temp_zone());
+
+ bool one_string_builder_or_more_valid = false;
+ for (unsigned int string_builder_id = 0;
+ string_builder_id < string_builder_count_; string_builder_id++) {
+ StringBuilder* string_builder = &string_builders_[string_builder_id];
+ Node* start = string_builder->start;
+ Status start_status = GetStatus(start);
+ if (start_status.state != State::kBeginStringBuilder ||
+ !string_builder->has_loop_phi) {
+ // {start} has already been invalidated, or the string builder doesn't
+ // contain a loop Phi.
+ *string_builder = kInvalidStringBuilder;
+ UpdateStatus(start, State::kInvalid);
+ continue;
+ }
+ DCHECK_EQ(start_status.state, State::kBeginStringBuilder);
+ DCHECK_EQ(start_status.id, string_builder_id);
+ one_string_builder_or_more_valid = true;
+
+ OneOrTwoByteAnalysis::State one_or_two_byte =
+ one_or_two_byte_analysis.OneOrTwoByte(start);
+
+ to_visit.clear();
+ ends.clear();
+
+ to_visit.push_back(start);
+ while (!to_visit.empty()) {
+ Node* curr = to_visit.back();
+ to_visit.pop_back();
+
+ Status curr_status = GetStatus(curr);
+ if (curr_status.state == State::kConfirmedInStringBuilder) continue;
+
+ DCHECK(curr_status.state == State::kInStringBuilder ||
+ curr_status.state == State::kBeginStringBuilder);
+ DCHECK_IMPLIES(curr_status.state == State::kBeginStringBuilder,
+ curr == start);
+ DCHECK_EQ(curr_status.id, start_status.id);
+ if (curr_status.state != State::kBeginStringBuilder) {
+ UpdateStatus(curr, State::kConfirmedInStringBuilder);
+ }
+
+ if (IsConcat(curr)) {
+ one_or_two_byte = OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte(
+ one_or_two_byte, one_or_two_byte_analysis.OneOrTwoByte(curr));
+ // Duplicating string inputs if needed, and marking them as
+ // InStringBuilder (so that EffectControlLinearizer doesn't lower them).
+ ReplaceConcatInputIfNeeded(curr, 1);
+ ReplaceConcatInputIfNeeded(curr, 2);
+ }
+
+ // Check if {curr} is one of the string builder's ends: if {curr} has no
+ // uses that are part of the string builder, then {curr} ends the string
+ // builder.
+ bool has_use_in_string_builder = false;
+ for (Node* next : curr->uses()) {
+ Status next_status = GetStatus(next);
+ if ((next_status.state == State::kInStringBuilder ||
+ next_status.state == State::kConfirmedInStringBuilder) &&
+ next_status.id == curr_status.id) {
+ if (next_status.state == State::kInStringBuilder) {
+ // We only add to {to_visit} when the state is kInStringBuilder to
+ // make sure that we don't revisit already-visited nodes.
+ to_visit.push_back(next);
+ }
+ if (!IsLoopPhi(curr) || !LoopContains(curr, next)) {
+ // The condition above is true when:
+ // - {curr} is not a loop phi: in that case, {next} is (one of) the
+ // nodes in the string builder after {curr}.
+ // - {curr} is a loop phi, and {next} is not inside the loop: in
+ // that case, {node} is (one of) the nodes in the string builder
+ // that are after {curr}. Note that we ignore uses of {curr}
+ // inside the loop, since if {curr} has no uses **after** the
+ // loop, then it's (one of) the end of the string builder.
+ has_use_in_string_builder = true;
+ }
+ }
+ }
+ if (!has_use_in_string_builder) {
+ ends.push_back(curr);
+ }
+ }
+
+ // Note that there is no need to check that the ends have no conflicting
+ // uses, because none of the ends can be alive at the same time, and thus,
+ // uses of the different ends can't be alive at the same time either. The
+ // reason that ens can't be alive at the same time is that if 2 ends were
+ // alive at the same time, then there exist a node n that is a predecessors
+ // of both ends, and that has 2 successors in the string builder (and alive
+ // at the same time), which is not possible because CheckNodeUses prevents
+ // it.
+
+ // Collecting next blocks where trimming is required (blocks following a
+ // loop Phi where the Phi is the last in a string builder), and setting
+ // kEndStringBuilder state to nodes where trimming should be done right
+ // after computing the node (when the last node in a string builder is not a
+ // loop phi).
+ for (Node* end : ends) {
+ if (IsLoopPhi(end)) {
+ BasicBlock* phi_block = schedule()->block(end);
+ for (BasicBlock* block : phi_block->successors()) {
+ if (phi_block->LoopContains(block)) continue;
+ if (!blocks_to_trimmings_map_[block->id().ToInt()].has_value()) {
+ blocks_to_trimmings_map_[block->id().ToInt()] =
+ ZoneVector<Node*>(temp_zone());
+ }
+ blocks_to_trimmings_map_[block->id().ToInt()]->push_back(end);
+ }
+ UpdateStatus(end, State::kEndStringBuilderLoopPhi);
+ } else {
+ UpdateStatus(end, State::kEndStringBuilder);
+ }
+ }
+
+ string_builder->one_or_two_bytes = one_or_two_byte;
+ }
+
+#ifdef DEBUG
+ if (one_string_builder_or_more_valid) {
+ broker()->isolate()->set_has_turbofan_string_builders();
+ }
+#else
+ USE(one_string_builder_or_more_valid);
+#endif
+}
+
+void StringBuilderOptimizer::VisitGraph() {
+ // Initial discovery of the potential string builders.
+ for (BasicBlock* block : *schedule()->rpo_order()) {
+ // Removing finished loops.
+ while (!loop_headers_.empty() &&
+ loop_headers_.back()->loop_end() == block) {
+ loop_headers_.pop_back();
+ }
+ // Adding new loop if necessary.
+ if (block->IsLoopHeader()) {
+ loop_headers_.push_back(block);
+ }
+ // Visiting block content.
+ for (Node* node : *block->nodes()) {
+ VisitNode(node, block);
+ }
+ }
+
+ // Finalize valid string builders (moving valid nodes to status
+ // kConfirmedInStringBuilder or kEndStringBuilder), and collecting the
+ // trimming points.
+ FinalizeStringBuilders();
+}
+
+void StringBuilderOptimizer::Run() { VisitGraph(); }
+
+StringBuilderOptimizer::StringBuilderOptimizer(JSGraph* jsgraph,
+ Schedule* schedule,
+ Zone* temp_zone,
+ JSHeapBroker* broker)
+ : jsgraph_(jsgraph),
+ schedule_(schedule),
+ temp_zone_(temp_zone),
+ broker_(broker),
+ blocks_to_trimmings_map_(schedule->BasicBlockCount(), temp_zone),
+ status_(jsgraph->graph()->NodeCount(),
+ Status{kInvalidId, State::kUnvisited}, temp_zone),
+ string_builders_(temp_zone),
+ loop_headers_(temp_zone) {}
+
+} // namespace compiler
+} // namespace internal
+} // namespace v8