diff options
Diffstat (limited to 'deps/v8/src/compiler/string-builder-optimizer.cc')
-rw-r--r-- | deps/v8/src/compiler/string-builder-optimizer.cc | 1193 |
1 files changed, 1193 insertions, 0 deletions
diff --git a/deps/v8/src/compiler/string-builder-optimizer.cc b/deps/v8/src/compiler/string-builder-optimizer.cc new file mode 100644 index 0000000000..73ad2903b8 --- /dev/null +++ b/deps/v8/src/compiler/string-builder-optimizer.cc @@ -0,0 +1,1193 @@ +// Copyright 2022 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/compiler/string-builder-optimizer.h" + +#include <algorithm> + +#include "src/base/bits.h" +#include "src/base/logging.h" +#include "src/base/optional.h" +#include "src/base/small-vector.h" +#include "src/compiler/access-builder.h" +#include "src/compiler/graph-assembler.h" +#include "src/compiler/js-graph.h" +#include "src/compiler/js-heap-broker.h" +#include "src/compiler/js-operator.h" +#include "src/compiler/node-matchers.h" +#include "src/compiler/node-properties.h" +#include "src/compiler/node.h" +#include "src/compiler/opcodes.h" +#include "src/compiler/operator.h" +#include "src/compiler/schedule.h" +#include "src/compiler/types.h" +#include "src/objects/code.h" +#include "src/objects/map-inl.h" +#include "src/utils/utils.h" +#include "src/zone/zone-containers.h" + +namespace v8 { +namespace internal { +namespace compiler { +namespace { + +// Returns true if {node} is a kStringConcat or a kNewConsString. +bool IsConcat(Node* node) { + return node->opcode() == IrOpcode::kStringConcat || + node->opcode() == IrOpcode::kNewConsString; +} + +// Returns true if {node} is considered as a literal string by the string +// builder optimizer: +// - it's a literal string +// - or it's a kStringFromSingleCharCode +bool IsLiteralString(Node* node, JSHeapBroker* broker) { + switch (node->opcode()) { + case IrOpcode::kHeapConstant: { + HeapObjectMatcher m(node); + return m.HasResolvedValue() && m.Ref(broker).IsString() && + m.Ref(broker).AsString().IsContentAccessible(); + } + case IrOpcode::kStringFromSingleCharCode: + return true; + default: + return false; + } +} + +// Returns true if {node} has at least one concatenation or phi in its uses. +bool HasConcatOrPhiUse(Node* node) { + for (Node* use : node->uses()) { + if (IsConcat(use) || use->opcode() == IrOpcode::kPhi) { + return true; + } + } + return false; +} + +} // namespace + +OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte( + State a, State b) { + DCHECK(a != State::kUnknown && b != State::kUnknown); + if (a == State::kOneByte && b == State::kOneByte) { + return State::kOneByte; + } + if (a == State::kTwoByte || b == State::kTwoByte) { + return State::kTwoByte; + } + return State::kCantKnow; +} + +base::Optional<std::pair<int64_t, int64_t>> OneOrTwoByteAnalysis::TryGetRange( + Node* node) { + switch (node->opcode()) { + case IrOpcode::kChangeTaggedToFloat64: + case IrOpcode::kTruncateFloat64ToWord32: + return TryGetRange(node->InputAt(0)); + + case IrOpcode::kInt32Add: + case IrOpcode::kInt32AddWithOverflow: + case IrOpcode::kInt64Add: + case IrOpcode::kInt64AddWithOverflow: + case IrOpcode::kFloat32Add: + case IrOpcode::kFloat64Add: { + base::Optional<std::pair<int64_t, int64_t>> left = + TryGetRange(node->InputAt(0)); + base::Optional<std::pair<int64_t, int64_t>> right = + TryGetRange(node->InputAt(1)); + if (left.has_value() && right.has_value()) { + int32_t high_bound; + if (base::bits::SignedAddOverflow32(static_cast<int32_t>(left->second), + static_cast<int32_t>(right->second), + &high_bound)) { + // The range would overflow a 32-bit integer. + return base::nullopt; + } + return std::pair{left->first + right->first, high_bound}; + } else { + return base::nullopt; + } + } + + case IrOpcode::kInt32Sub: + case IrOpcode::kInt32SubWithOverflow: + case IrOpcode::kInt64Sub: + case IrOpcode::kInt64SubWithOverflow: + case IrOpcode::kFloat32Sub: + case IrOpcode::kFloat64Sub: { + base::Optional<std::pair<int64_t, int64_t>> left = + TryGetRange(node->InputAt(0)); + base::Optional<std::pair<int64_t, int64_t>> right = + TryGetRange(node->InputAt(1)); + if (left.has_value() && right.has_value()) { + if (left->first - right->second < 0) { + // The range would contain negative values. + return base::nullopt; + } + return std::pair{left->first - right->second, + left->second - right->first}; + } else { + return base::nullopt; + } + } + + case IrOpcode::kWord32And: + case IrOpcode::kWord64And: { + // Note that the minimal value for "a & b" is always 0, regardless of the + // max for "a" or "b". And the maximal value is the min of "max of a" and + // "max of b". + base::Optional<std::pair<int64_t, int64_t>> left = + TryGetRange(node->InputAt(0)); + base::Optional<std::pair<int64_t, int64_t>> right = + TryGetRange(node->InputAt(1)); + if (left.has_value() && right.has_value()) { + return std::pair{0, std::min(left->second, right->second)}; + } else if (left.has_value()) { + return std::pair{0, left->second}; + } else if (right.has_value()) { + return std::pair{0, right->second}; + } else { + return base::nullopt; + } + } + + case IrOpcode::kInt32Mul: + case IrOpcode::kInt32MulWithOverflow: + case IrOpcode::kInt64Mul: + case IrOpcode::kFloat32Mul: + case IrOpcode::kFloat64Mul: { + base::Optional<std::pair<int64_t, int64_t>> left = + TryGetRange(node->InputAt(0)); + base::Optional<std::pair<int64_t, int64_t>> right = + TryGetRange(node->InputAt(1)); + if (left.has_value() && right.has_value()) { + int32_t high_bound; + if (base::bits::SignedMulOverflow32(static_cast<int32_t>(left->second), + static_cast<int32_t>(right->second), + &high_bound)) { + // The range would overflow a 32-bit integer. + return base::nullopt; + } + return std::pair{left->first * right->first, + left->second * right->second}; + } else { + return base::nullopt; + } + } + + case IrOpcode::kCall: { + HeapObjectMatcher m(node->InputAt(0)); + if (m.HasResolvedValue() && m.Ref(broker()).IsCode()) { + CodeRef code = m.Ref(broker()).AsCode(); + if (code.object()->is_builtin()) { + Builtin builtin = code.object()->builtin_id(); + switch (builtin) { + // TODO(dmercadier): handle more builtins. + case Builtin::kMathRandom: + return std::pair{0, 1}; + default: + return base::nullopt; + } + } + } + return base::nullopt; + } + +#define CONST_CASE(op, matcher) \ + case IrOpcode::k##op: { \ + matcher m(node); \ + if (m.HasResolvedValue()) { \ + if (m.ResolvedValue() < 0 || \ + m.ResolvedValue() >= std::numeric_limits<int32_t>::min()) { \ + return base::nullopt; \ + } \ + return std::pair{m.ResolvedValue(), m.ResolvedValue()}; \ + } else { \ + return base::nullopt; \ + } \ + } + CONST_CASE(Float32Constant, Float32Matcher) + CONST_CASE(Float64Constant, Float64Matcher) + CONST_CASE(Int32Constant, Int32Matcher) + CONST_CASE(Int64Constant, Int64Matcher) + CONST_CASE(NumberConstant, NumberMatcher) +#undef CONST_CASE + + default: + return base::nullopt; + } +} + +// Tries to determine whether {node} is a 1-byte or a 2-byte string. This +// function assumes that {node} is part of a string builder: if it's a +// concatenation and its left hand-side is something else than a literal string, +// it returns only whether the right hand-side is 1/2-byte: the String builder +// analysis will take care of propagating the state of the left hand-side. +OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::OneOrTwoByte(Node* node) { + // TODO(v8:13785,dmercadier): once externalization can no longer convert a + // 1-byte into a 2-byte string, compute the proper OneOrTwoByte state. + return State::kCantKnow; +#if 0 + if (states_[node->id()] != State::kUnknown) { + return states_[node->id()]; + } + switch (node->opcode()) { + case IrOpcode::kHeapConstant: { + HeapObjectMatcher m(node); + if (m.HasResolvedValue() && m.Ref(broker()).IsString()) { + StringRef string = m.Ref(broker()).AsString(); + if (string.object()->IsOneByteRepresentation()) { + states_[node->id()] = State::kOneByte; + return State::kOneByte; + } else { + DCHECK(string.object()->IsTwoByteRepresentation()); + states_[node->id()] = State::kTwoByte; + return State::kTwoByte; + } + } else { + states_[node->id()] = State::kCantKnow; + return State::kCantKnow; + } + } + + case IrOpcode::kStringFromSingleCharCode: { + Node* input = node->InputAt(0); + switch (input->opcode()) { + case IrOpcode::kStringCharCodeAt: { + State state = OneOrTwoByte(input->InputAt(0)); + states_[node->id()] = state; + return state; + } + + default: { + base::Optional<std::pair<int64_t, int64_t>> range = + TryGetRange(input); + if (!range.has_value()) { + states_[node->id()] = State::kCantKnow; + return State::kCantKnow; + } else if (range->first >= 0 && range->second < 255) { + states_[node->id()] = State::kOneByte; + return State::kOneByte; + } else { + // For values greater than 0xFF, with the current analysis, we have + // no way of knowing if the result will be on 1 or 2 bytes. For + // instance, `String.fromCharCode(0x120064 & 0xffff)` will + // be a 1-byte string, although the analysis will consider that its + // range is [0, 0xffff]. + states_[node->id()] = State::kCantKnow; + return State::kCantKnow; + } + } + } + } + + case IrOpcode::kStringConcat: + case IrOpcode::kNewConsString: { + Node* lhs = node->InputAt(1); + Node* rhs = node->InputAt(2); + + DCHECK(IsLiteralString(rhs, broker())); + State rhs_state = OneOrTwoByte(rhs); + + // OneOrTwoByte is only called for Nodes that are part of a String + // Builder. As a result, a StringConcat/NewConsString is either: + // - between 2 string literal if it is the 1st concatenation of the + // string builder. + // - between the beginning of the string builder and a literal string. + // Thus, if {lhs} is not a literal string, we ignore its State: the + // analysis should already have been done on its predecessors anyways. + State lhs_state = + IsLiteralString(lhs, broker()) ? OneOrTwoByte(lhs) : rhs_state; + + State node_state = ConcatResultIsOneOrTwoByte(rhs_state, lhs_state); + states_[node->id()] = node_state; + + return node_state; + } + + default: + states_[node->id()] = State::kCantKnow; + return State::kCantKnow; + } +#endif +} + +bool StringBuilderOptimizer::BlockShouldFinalizeStringBuilders( + BasicBlock* block) { + DCHECK_LT(block->id().ToInt(), blocks_to_trimmings_map_.size()); + return blocks_to_trimmings_map_[block->id().ToInt()].has_value(); +} + +ZoneVector<Node*> StringBuilderOptimizer::GetStringBuildersToFinalize( + BasicBlock* block) { + DCHECK(BlockShouldFinalizeStringBuilders(block)); + return blocks_to_trimmings_map_[block->id().ToInt()].value(); +} + +OneOrTwoByteAnalysis::State StringBuilderOptimizer::GetOneOrTwoByte( + Node* node) { + DCHECK(ConcatIsInStringBuilder(node)); + // TODO(v8:13785,dmercadier): once externalization can no longer convert a + // 1-byte into a 2-byte string, return the proper OneOrTwoByte status for the + // node (= remove the next line and uncomment the 2 after). + return OneOrTwoByteAnalysis::State::kCantKnow; + // int string_builder_number = GetStringBuilderIdForConcat(node); + // return string_builders_[string_builder_number].one_or_two_bytes; +} + +bool StringBuilderOptimizer::IsStringBuilderEnd(Node* node) { + Status status = GetStatus(node); + DCHECK_IMPLIES(status.state == State::kEndStringBuilder || + status.state == State::kEndStringBuilderLoopPhi, + status.id != kInvalidId && + StringBuilderIsValid(string_builders_[status.id])); + return status.state == State::kEndStringBuilder || + status.state == State::kEndStringBuilderLoopPhi; +} + +bool StringBuilderOptimizer::IsNonLoopPhiStringBuilderEnd(Node* node) { + return IsStringBuilderEnd(node) && !IsLoopPhi(node); +} + +bool StringBuilderOptimizer::IsStringBuilderConcatInput(Node* node) { + Status status = GetStatus(node); + DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder, + status.id != kInvalidId && + StringBuilderIsValid(string_builders_[status.id])); + return status.state == State::kConfirmedInStringBuilder; +} + +bool StringBuilderOptimizer::ConcatIsInStringBuilder(Node* node) { + DCHECK(IsConcat(node)); + Status status = GetStatus(node); + DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder || + status.state == State::kBeginStringBuilder || + status.state == State::kEndStringBuilder, + status.id != kInvalidId && + StringBuilderIsValid(string_builders_[status.id])); + return status.state == State::kConfirmedInStringBuilder || + status.state == State::kBeginStringBuilder || + status.state == State::kEndStringBuilder; +} + +int StringBuilderOptimizer::GetStringBuilderIdForConcat(Node* node) { + DCHECK(IsConcat(node)); + Status status = GetStatus(node); + DCHECK(status.state == State::kConfirmedInStringBuilder || + status.state == State::kBeginStringBuilder || + status.state == State::kEndStringBuilder); + DCHECK_NE(status.id, kInvalidId); + return status.id; +} + +bool StringBuilderOptimizer::IsFirstConcatInStringBuilder(Node* node) { + if (!ConcatIsInStringBuilder(node)) return false; + Status status = GetStatus(node); + return status.state == State::kBeginStringBuilder; +} + +// Duplicates the {input_idx}th input of {node} if it has multiple uses, so that +// the replacement only has one use and can safely be marked as +// State::kConfirmedInStringBuilder and properly optimized in +// EffectControlLinearizer (in particular, this will allow to safely remove +// StringFromSingleCharCode that are only used for a StringConcat that we +// optimize). +void StringBuilderOptimizer::ReplaceConcatInputIfNeeded(Node* node, + int input_idx) { + if (!IsLiteralString(node->InputAt(input_idx), broker())) return; + Node* input = node->InputAt(input_idx); + DCHECK_EQ(input->op()->EffectOutputCount(), 0); + DCHECK_EQ(input->op()->ControlOutputCount(), 0); + if (input->UseCount() > 1) { + input = graph()->CloneNode(input); + node->ReplaceInput(input_idx, input); + } + Status node_status = GetStatus(node); + DCHECK_NE(node_status.id, kInvalidId); + SetStatus(input, State::kConfirmedInStringBuilder, node_status.id); +} + +// If all of the predecessors of {node} are part of a string builder and have +// the same id, returns this id. Otherwise, returns kInvalidId. +int StringBuilderOptimizer::GetPhiPredecessorsCommonId(Node* node) { + DCHECK_EQ(node->opcode(), IrOpcode::kPhi); + int id = kInvalidId; + for (int i = 0; i < node->op()->ValueInputCount(); i++) { + Node* input = NodeProperties::GetValueInput(node, i); + Status status = GetStatus(input); + switch (status.state) { + case State::kBeginStringBuilder: + case State::kInStringBuilder: + case State::kPendingPhi: + if (id == kInvalidId) { + // Initializind {id}. + id = status.id; + } else if (id != status.id) { + // 2 inputs belong to different StringBuilder chains. + return kInvalidId; + } + break; + case State::kInvalid: + case State::kUnvisited: + return kInvalidId; + default: + UNREACHABLE(); + } + } + DCHECK_NE(id, kInvalidId); + return id; +} + +namespace { + +// Returns true if {first} comes before {second} in {block}. +bool ComesBeforeInBlock(Node* first, Node* second, BasicBlock* block) { + for (Node* node : *block->nodes()) { + if (node == first) { + return true; + } + if (node == second) { + return false; + } + } + UNREACHABLE(); +} + +static constexpr int kMaxPredecessors = 15; + +// Compute up to {kMaxPredecessors} predecessors of {start} that are not past +// {end}, and store them in {dst}. Returns true if there are less than +// {kMaxPredecessors} such predecessors and false otherwise. +bool ComputePredecessors( + BasicBlock* start, BasicBlock* end, + base::SmallVector<BasicBlock*, kMaxPredecessors>* dst) { + dst->push_back(start); + size_t stack_pointer = 0; + while (stack_pointer < dst->size()) { + BasicBlock* current = (*dst)[stack_pointer++]; + if (current == end) continue; + for (BasicBlock* pred : current->predecessors()) { + if (std::find(dst->begin(), dst->end(), pred) == dst->end()) { + if (dst->size() == kMaxPredecessors) return false; + dst->push_back(pred); + } + } + } + return true; +} + +// Returns false if {node} makes its string input escape this use. For instance, +// a Phi or a Store make their input escape, but a kStringLength consumes its +// inputs. +bool OpcodeIsAllowed(IrOpcode::Value op) { + switch (op) { + case IrOpcode::kStringLength: + case IrOpcode::kStringConcat: + case IrOpcode::kNewConsString: + case IrOpcode::kStringCharCodeAt: + case IrOpcode::kStringCodePointAt: + case IrOpcode::kStringIndexOf: + case IrOpcode::kObjectIsString: + case IrOpcode::kStringToLowerCaseIntl: + case IrOpcode::kStringToNumber: + case IrOpcode::kStringToUpperCaseIntl: + case IrOpcode::kStringEqual: + case IrOpcode::kStringLessThan: + case IrOpcode::kStringLessThanOrEqual: + case IrOpcode::kCheckString: + case IrOpcode::kTypedStateValues: + return true; + default: + return false; + } +} + +// Returns true if {sb_child_block} can be a valid successor for +// {previous_block} in the string builder, considering that {other_child_block} +// is another successor of {previous_block} (which uses the string builder that +// is in {previous_block}).We are mainly checking for the following scenario: +// +// | +// v +// +---> LoopPhi +// | | +// | v +// | node ----------> other_child +// | | +// | v +// | child +// | ... +// | | +// +-------+ +// +// Where {node} and {child} are inside a loop (and could be part of a string +// builder), but {other_child} is not, and the control flow doesn't exit the +// loop in between {node} and {child}. The string builder should not be used in +// such situations, because by the time {other_child} is reached, its input will +// be invalid, because {child} will have mutated it. (here, node's block would +// be {previous_block}, child's would be {sb_child_block} and other_child's +// would be {other_child_block}). +bool ValidControlFlowForStringBuilder(BasicBlock* sb_child_block, + BasicBlock* other_child_block, + BasicBlock* previous_block, + ZoneVector<BasicBlock*> loop_headers) { + if (loop_headers.empty()) return true; + // Due to how we visit the graph, {sb_child_block} is the block that + // VisitGraph is currently visiting, which means that it has to be in all the + // loops of {loop_headers} (and in particular in the latest one). + // {other_child_block} on the other hand could be in the loop or not, which is + // what this function tries to determine. + DCHECK(loop_headers.back()->LoopContains(sb_child_block)); + if (sb_child_block->IsLoopHeader()) { + // {sb_child_block} starts a loop. This is OK for {other_child_block} only + // if {other_child_block} is before the loop (because if it's after, then + // the value it will receive will be invalid), or if both + // {other_child_block} and {previous_block} are inside the loop. The latter + // case corresponds to: + // + // +--------> sb_child_block + // | / \ + // | | \ + // | v v + // | previous_block other_child_block + // | | + // +--------+ + // + // Where {other_child_block} eventually reaches {previous_block} (or exits + // the loop through some other path). + return other_child_block->rpo_number() < sb_child_block->rpo_number() || + (sb_child_block->LoopContains(previous_block) && + (sb_child_block->LoopContains(other_child_block))); + } else { + // Both {sb_child_block} and {other_child_block} should be in the same loop. + return loop_headers.back()->LoopContains(other_child_block); + } +} + +// Return true if {maybe_dominator} dominates {maybe_dominee} and is less than +// {kMaxDominatorSteps} steps away (to avoid going back too far if +// {maybe_dominee} is much deeper in the graph that {maybe_dominator}). +bool IsClosebyDominator(BasicBlock* maybe_dominator, + BasicBlock* maybe_dominee) { + static constexpr int kMaxDominatorSteps = 10; + if (maybe_dominee->dominator_depth() + kMaxDominatorSteps < + maybe_dominator->dominator_depth()) { + // {maybe_dominee} is too far from {maybe_dominator} to compute quickly if + // it's dominated by {maybe_dominator} or not. + return false; + } + while (maybe_dominee != maybe_dominator && + maybe_dominator->dominator_depth() < + maybe_dominee->dominator_depth()) { + maybe_dominee = maybe_dominee->dominator(); + } + return maybe_dominee == maybe_dominator; +} + +// Returns true if {node} is a Phi that has both {input1} and {input2} as +// inputs. +bool IsPhiContainingGivenInputs(Node* node, Node* input1, Node* input2, + Schedule* schedule) { + if (node->opcode() != IrOpcode::kPhi || + schedule->block(node)->IsLoopHeader()) { + return false; + } + bool has_input1 = false, has_input2 = false; + for (Node* input : node->inputs()) { + if (input == input1) { + has_input1 = true; + } else if (input == input2) { + has_input2 = true; + } + } + return has_input1 && has_input2; +} + +// Returns true if {phi} has 3 inputs (including the Loop or Merge), and its +// first two inputs are either Phi themselves, or StringConcat/NewConsString. +// This is used to quickly eliminate Phi nodes that cannot be part of a String +// Builder. +bool PhiInputsAreConcatsOrPhi(Node* phi) { + DCHECK_EQ(phi->opcode(), IrOpcode::kPhi); + return phi->InputCount() == 3 && + (phi->InputAt(0)->opcode() == IrOpcode::kPhi || + IsConcat(phi->InputAt(0))) && + (phi->InputAt(1)->opcode() == IrOpcode::kPhi || + IsConcat(phi->InputAt(1))); +} + +} // namespace + +// Check that the uses of {node} are valid, assuming that {string_builder_child} +// is the following node in the string builder. In a nutshell, for uses of a +// node (that is part of the string builder) to be valid, they need to all +// appear before the next node of the string builder (because after, the node is +// not valid anymore because we mutate SlicedString and the backing store in +// place). For instance: +// +// s1 = "123" + "abc"; +// s2 = s1 + "def"; +// l = s1.length(); +// +// In this snippet, if `s1` and `s2` are part of the string builder, then the +// uses of `s1` are not actually valid, because `s1.length()` appears after the +// next node of the string builder (`s2`) has been computed. +bool StringBuilderOptimizer::CheckNodeUses(Node* node, + Node* string_builder_child, + Status status) { + DCHECK(GetStatus(string_builder_child).state == State::kInStringBuilder || + GetStatus(string_builder_child).state == State::kPendingPhi); + BasicBlock* child_block = schedule()->block(string_builder_child); + if (node->UseCount() == 1) return true; + BasicBlock* node_block = schedule()->block(node); + bool is_loop_phi = IsLoopPhi(node); + bool child_is_in_loop = + is_loop_phi && LoopContains(node, string_builder_child); + base::SmallVector<BasicBlock*, kMaxPredecessors> current_predecessors; + bool predecessors_computed = false; + for (Node* other_child : node->uses()) { + if (other_child == string_builder_child) continue; + BasicBlock* other_child_block = schedule()->block(other_child); + if (!OpcodeIsAllowed(other_child->opcode())) { + // {other_child} could write {node} (the beginning of the string builder) + // in memory (or keep it alive through other means, such as a Phi). This + // means that if {string_builder_child} modifies the string builder, then + // the value stored by {other_child} will become out-dated (since + // {other_child} will probably just write a pointer to the string in + // memory, and the string pointed by this pointer will be updated by the + // string builder). + if (is_loop_phi && child_is_in_loop && + !node_block->LoopContains(other_child_block)) { + // {other_child} keeps the string alive, but this is only after the + // loop, when {string_builder_child} isn't alive anymore, so this isn't + // an issue. + continue; + } + return false; + } + if (other_child_block == child_block) { + // Both {child} and {other_child} are in the same block, we need to make + // sure that {other_child} comes first. + Status other_status = GetStatus(other_child); + if (other_status.id != kInvalidId) { + DCHECK_EQ(other_status.id, status.id); + // {node} flows into 2 different nodes of the string builder, both of + // which are in the same BasicBlock, which is not supported. We need to + // invalidate {other_child} as well, or the input of {child} could be + // wrong. In theory, we could keep one of {other_child} and {child} (the + // one that comes the later in the BasicBlock), but it's simpler to keep + // neither, and end the string builder on {node}. + SetStatus(other_child, State::kInvalid); + return false; + } + if (!ComesBeforeInBlock(other_child, string_builder_child, child_block)) { + return false; + } + continue; + } + if (is_loop_phi) { + if ((child_is_in_loop && !node_block->LoopContains(other_child_block)) || + (!child_is_in_loop && node_block->LoopContains(other_child_block))) { + // {child} is in the loop and {other_child} isn't (or the other way + // around). In that case, we skip {other_child}: it will be tested + // later when we leave the loop (if {child} is in the loop) or has + // been tested earlier while we were inside the loop (if {child} isn't + // in the loop). + continue; + } + } else if (!ValidControlFlowForStringBuilder(child_block, other_child_block, + node_block, loop_headers_)) { + return false; + } + + if (IsPhiContainingGivenInputs(other_child, node, string_builder_child, + schedule())) { + // {other_child} is a Phi that merges {child} and {node} (and maybe some + // other nodes that we don't care about for now: if {other_child} merges + // more than 2 nodes, it won't be added to the string builder anyways). + continue; + } + + base::SmallVector<BasicBlock*, kMaxPredecessors> other_predecessors; + bool all_other_predecessors_computed = + ComputePredecessors(other_child_block, node_block, &other_predecessors); + + // Making sure that {child_block} isn't in the predecessors of + // {other_child_block}. Otherwise, the use of {node} in {other_child} + // would be invalid. + if (std::find(other_predecessors.begin(), other_predecessors.end(), + child_block) != other_predecessors.end()) { + // {child} is in the predecessor of {other_child}, which is definitely + // invalid (because it means that {other_child} uses an out-dated version + // of {node}, since {child} modified it). + return false; + } else { + if (all_other_predecessors_computed) { + // {child} is definitely not in the predecessors of {other_child}, which + // means that it's either a successor of {other_child} (which is safe), + // or it's in another path of the graph alltogether (which is also + // safe). + continue; + } else { + // We didn't compute all the predecessors of {other_child}, so it's + // possible that {child_block} is one of the predecessor that we didn't + // compute. + // + // Trying to see if we can find {other_child_block} in the + // predecessors of {child_block}: that would mean that {other_child} + // is guaranteed to be scheduled before {child}, making it safe. + if (!predecessors_computed) { + ComputePredecessors(child_block, node_block, ¤t_predecessors); + predecessors_computed = true; + } + if (std::find(current_predecessors.begin(), current_predecessors.end(), + other_child_block) == current_predecessors.end()) { + // We didn't find {other_child} in the predecessors of {child}. It + // means that either {other_child} comes after in the graph (which + // is unsafe), or that {other_child} and {child} are on two + // independent subgraphs (which is safe). We have no efficient way + // to know which one of the two this is, so, we fall back to a + // stricter approach: the use of {node} in {other_child} is + // guaranteed to be safe if {other_child_block} dominates + // {child_block}. + if (!IsClosebyDominator(other_child_block, child_block)) { + return false; + } + } + } + } + } + return true; +} + +// Check that the uses of the predecessor(s) of {child} in the string builder +// are valid, with respect to {child}. This sounds a bit backwards, but we can't +// check if uses are valid before having computed what the next node in the +// string builder is. Hence, once we've established that {child} is in the +// string builder, we check that the uses of the previous node(s) of the +// string builder are valid. For non-loop phis (ie, merge phis), we simply check +// that the uses of their 2 predecessors are valid. For loop phis, this function +// is called twice: one for the outside-the-loop input (with {input_if_loop_phi} +// = 0), and once for the inside-the-loop input (with {input_if_loop_phi} = 1). +bool StringBuilderOptimizer::CheckPreviousNodeUses(Node* child, Status status, + int input_if_loop_phi) { + if (IsConcat(child)) { + return CheckNodeUses(child->InputAt(1), child, status); + } + if (child->opcode() == IrOpcode::kPhi) { + BasicBlock* child_block = schedule()->block(child); + if (child_block->IsLoopHeader()) { + return CheckNodeUses(child->InputAt(input_if_loop_phi), child, status); + } else { + DCHECK_EQ(child->InputCount(), 3); + return CheckNodeUses(child->InputAt(0), child, status) && + CheckNodeUses(child->InputAt(1), child, status); + } + } + UNREACHABLE(); +} + +void StringBuilderOptimizer::VisitNode(Node* node, BasicBlock* block) { + if (IsConcat(node)) { + Node* lhs = node->InputAt(1); + Node* rhs = node->InputAt(2); + + if (!IsLiteralString(rhs, broker())) { + SetStatus(node, State::kInvalid); + return; + } + + if (IsLiteralString(lhs, broker())) { + // This node could start a string builder. However, we won't know until + // we've properly inspected its uses, found a Phi somewhere down its use + // chain, made sure that the Phi was valid, etc. Pre-emptively, we do a + // quick check (with HasConcatOrPhiUse) that this node has a + // StringConcat/NewConsString in its uses, and if so, we set its state as + // kBeginConcat, and increment the {string_builder_count_}. The goal of + // the HasConcatOrPhiUse is mainly to avoid incrementing + // {string_builder_count_} too often for things that are obviously just + // regular concatenations of 2 constant strings and that can't be + // beginning of string builders. + if (HasConcatOrPhiUse(lhs)) { + SetStatus(node, State::kBeginStringBuilder, string_builder_count_); + string_builders_.push_back( + StringBuilder{node, static_cast<int>(string_builder_count_), false, + OneOrTwoByteAnalysis::State::kUnknown}); + string_builder_count_++; + } + // A concatenation between 2 literal strings has no predecessor in the + // string builder, and there is thus no more checks/bookkeeping required + // ==> early return. + return; + } else { + Status lhs_status = GetStatus(lhs); + switch (lhs_status.state) { + case State::kBeginStringBuilder: + case State::kInStringBuilder: + SetStatus(node, State::kInStringBuilder, lhs_status.id); + break; + case State::kPendingPhi: { + BasicBlock* phi_block = schedule()->block(lhs); + if (phi_block->LoopContains(block)) { + // This node uses a PendingPhi and is inside the loop. We + // speculatively set it to kInStringBuilder. + SetStatus(node, State::kInStringBuilder, lhs_status.id); + } else { + // This node uses a PendingPhi but is not inside the loop, which + // means that the PendingPhi was never resolved to a kInConcat or a + // kInvalid, which means that it's actually not valid (because we + // visit the graph in RPO order, which means that we've already + // visited the whole loop). Thus, we set the Phi to kInvalid, and + // thus, we also set the current node to kInvalid. + SetStatus(lhs, State::kInvalid); + SetStatus(node, State::kInvalid); + } + break; + } + case State::kInvalid: + case State::kUnvisited: + SetStatus(node, State::kInvalid); + break; + default: + UNREACHABLE(); + } + } + } else if (node->opcode() == IrOpcode::kPhi && + PhiInputsAreConcatsOrPhi(node)) { + if (!block->IsLoopHeader()) { + // This Phi merges nodes after a if/else. + int id = GetPhiPredecessorsCommonId(node); + if (id == kInvalidId) { + SetStatus(node, State::kInvalid); + } else { + SetStatus(node, State::kInStringBuilder, id); + } + } else { + // This Phi merges a value from inside the loop with one from before. + DCHECK_EQ(node->op()->ValueInputCount(), 2); + Status first_input_status = GetStatus(node->InputAt(0)); + switch (first_input_status.state) { + case State::kBeginStringBuilder: + case State::kInStringBuilder: + SetStatus(node, State::kPendingPhi, first_input_status.id); + break; + case State::kPendingPhi: + case State::kInvalid: + case State::kUnvisited: + SetStatus(node, State::kInvalid); + break; + default: + UNREACHABLE(); + } + } + } else { + SetStatus(node, State::kInvalid); + } + + Status status = GetStatus(node); + if (status.state == State::kInStringBuilder || + status.state == State::kPendingPhi) { + // We make sure that this node being in the string builder doesn't conflict + // with other uses of the previous node of the string builder. Note that + // loop phis can never have the kInStringBuilder state at this point. We + // thus check their uses when we finish the loop and set the phi's status to + // InStringBuilder. + if (!CheckPreviousNodeUses(node, status, 0)) { + SetStatus(node, State::kInvalid); + return; + } + // Updating following PendingPhi if needed. + for (Node* use : node->uses()) { + if (use->opcode() == IrOpcode::kPhi) { + Status use_status = GetStatus(use); + if (use_status.state == State::kPendingPhi) { + // Finished the loop. + SetStatus(use, State::kInStringBuilder, status.id); + if (use_status.id == status.id && + CheckPreviousNodeUses(use, status, 1)) { + string_builders_[status.id].has_loop_phi = true; + } else { + // One of the uses of {node} is a pending Phi that hasn't the + // correct id (is that even possible?), or the uses of {node} are + // invalid. Either way, both {node} and {use} are invalid. + SetStatus(node, State::kInvalid); + SetStatus(use, State::kInvalid); + } + } + } + } + } +} + +// For each potential string builder, checks that their beginning has status +// kBeginStringBuilder, and that they contain at least one phi. Then, all of +// their "valid" nodes are switched from status State::InStringBuilder to status +// State::kConfirmedInStringBuilder (and "valid" kBeginStringBuilder are left +// as kBeginStringBuilder while invalid ones are switched to kInvalid). Nodes +// are considered "valid" if they are before any kPendingPhi in the string +// builder. Put otherwise, switching status from kInStringBuilder to +// kConfirmedInStringBuilder is a cheap way of getting rid of kInStringBuilder +// nodes that are invalid before one of their predecessor is a kPendingPhi that +// was never switched to kInStringBuilder. An example: +// +// StringConcat [1] +// kBeginStringBuilder +// | +// | +// v +// -----> Loop Phi [2] --------------- +// | kInStringBuilder | +// | | | +// | | | +// | v v +// | StringConcat [3] StringConcat [4] +// | kInStringBuilder kInStringBuilder +// | | | +// ----------| | +// v +// -----> Loop Phi [5] ------------> +// | kPendingPhi +// | | +// | | +// | v +// | StringConcat [6] +// | kInStringBuilder +// | | +// -----------| +// +// In this graph, nodes [1], [2], [3] and [4] are part of the string builder. In +// particular, node 2 has at some point been assigned the status kPendingPhi +// (because all loop phis start as kPendingPhi), but was later switched to +// status kInStringBuilder (because its uses inside the loop were compatible +// with the string builder), which implicitly made node [3] a valid part of the +// string builder. On the other hand, node [5] was never switched to status +// kInStringBuilder, which means that it is not valid, and any successor of [5] +// isn't valid either (remember that we speculatively set nodes following a +// kPendingPhi to kInStringBuilder). Thus, rather than having to iterate through +// the successors of kPendingPhi nodes to invalidate them, we simply update the +// status of valid nodes to kConfirmedInStringBuilder, after which any +// kInStringBuilder node is actually invalid. +// +// In this function, we also collect all the possible ends for each string +// builder (their can be multiple possible ends if there is a branch before the +// end of a string builder), as well as where trimming for a given string +// builder should be done (either right after the last node, or at the beginning +// of the blocks following this node). For an example of string builder with +// multiple ends, consider this code: +// +// let s = "a" + "b" +// for (...) { +// s += "..."; +// } +// if (...) return s + "abc"; +// else return s + "def"; +// +// Which would produce a graph that looks like: +// +// kStringConcat +// | +// | +// v +// -------> Loop Phi--------------- +// | | | +// | | | +// | v | +// | kStringConcat | +// | | | +// -------------| | +// | +// | +// ------------------------------------------ +// | | +// | | +// | | +// v v +// kStringConcat [1] kStringConcat [2] +// | | +// | | +// v v +// Return Return +// +// In this case, both kStringConcat [1] and [2] are valid ends for the string +// builder. +void StringBuilderOptimizer::FinalizeStringBuilders() { + OneOrTwoByteAnalysis one_or_two_byte_analysis(graph(), temp_zone(), broker()); + + // We use {to_visit} to iterate through a string builder, and {ends} to + // collect its ending. To save some memory, these 2 variables are declared a + // bit early, and we .clear() them at the beginning of each iteration (which + // shouldn't free their memory), rather than allocating new memory for each + // string builder. + ZoneVector<Node*> to_visit(temp_zone()); + ZoneVector<Node*> ends(temp_zone()); + + bool one_string_builder_or_more_valid = false; + for (unsigned int string_builder_id = 0; + string_builder_id < string_builder_count_; string_builder_id++) { + StringBuilder* string_builder = &string_builders_[string_builder_id]; + Node* start = string_builder->start; + Status start_status = GetStatus(start); + if (start_status.state != State::kBeginStringBuilder || + !string_builder->has_loop_phi) { + // {start} has already been invalidated, or the string builder doesn't + // contain a loop Phi. + *string_builder = kInvalidStringBuilder; + UpdateStatus(start, State::kInvalid); + continue; + } + DCHECK_EQ(start_status.state, State::kBeginStringBuilder); + DCHECK_EQ(start_status.id, string_builder_id); + one_string_builder_or_more_valid = true; + + OneOrTwoByteAnalysis::State one_or_two_byte = + one_or_two_byte_analysis.OneOrTwoByte(start); + + to_visit.clear(); + ends.clear(); + + to_visit.push_back(start); + while (!to_visit.empty()) { + Node* curr = to_visit.back(); + to_visit.pop_back(); + + Status curr_status = GetStatus(curr); + if (curr_status.state == State::kConfirmedInStringBuilder) continue; + + DCHECK(curr_status.state == State::kInStringBuilder || + curr_status.state == State::kBeginStringBuilder); + DCHECK_IMPLIES(curr_status.state == State::kBeginStringBuilder, + curr == start); + DCHECK_EQ(curr_status.id, start_status.id); + if (curr_status.state != State::kBeginStringBuilder) { + UpdateStatus(curr, State::kConfirmedInStringBuilder); + } + + if (IsConcat(curr)) { + one_or_two_byte = OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte( + one_or_two_byte, one_or_two_byte_analysis.OneOrTwoByte(curr)); + // Duplicating string inputs if needed, and marking them as + // InStringBuilder (so that EffectControlLinearizer doesn't lower them). + ReplaceConcatInputIfNeeded(curr, 1); + ReplaceConcatInputIfNeeded(curr, 2); + } + + // Check if {curr} is one of the string builder's ends: if {curr} has no + // uses that are part of the string builder, then {curr} ends the string + // builder. + bool has_use_in_string_builder = false; + for (Node* next : curr->uses()) { + Status next_status = GetStatus(next); + if ((next_status.state == State::kInStringBuilder || + next_status.state == State::kConfirmedInStringBuilder) && + next_status.id == curr_status.id) { + if (next_status.state == State::kInStringBuilder) { + // We only add to {to_visit} when the state is kInStringBuilder to + // make sure that we don't revisit already-visited nodes. + to_visit.push_back(next); + } + if (!IsLoopPhi(curr) || !LoopContains(curr, next)) { + // The condition above is true when: + // - {curr} is not a loop phi: in that case, {next} is (one of) the + // nodes in the string builder after {curr}. + // - {curr} is a loop phi, and {next} is not inside the loop: in + // that case, {node} is (one of) the nodes in the string builder + // that are after {curr}. Note that we ignore uses of {curr} + // inside the loop, since if {curr} has no uses **after** the + // loop, then it's (one of) the end of the string builder. + has_use_in_string_builder = true; + } + } + } + if (!has_use_in_string_builder) { + ends.push_back(curr); + } + } + + // Note that there is no need to check that the ends have no conflicting + // uses, because none of the ends can be alive at the same time, and thus, + // uses of the different ends can't be alive at the same time either. The + // reason that ens can't be alive at the same time is that if 2 ends were + // alive at the same time, then there exist a node n that is a predecessors + // of both ends, and that has 2 successors in the string builder (and alive + // at the same time), which is not possible because CheckNodeUses prevents + // it. + + // Collecting next blocks where trimming is required (blocks following a + // loop Phi where the Phi is the last in a string builder), and setting + // kEndStringBuilder state to nodes where trimming should be done right + // after computing the node (when the last node in a string builder is not a + // loop phi). + for (Node* end : ends) { + if (IsLoopPhi(end)) { + BasicBlock* phi_block = schedule()->block(end); + for (BasicBlock* block : phi_block->successors()) { + if (phi_block->LoopContains(block)) continue; + if (!blocks_to_trimmings_map_[block->id().ToInt()].has_value()) { + blocks_to_trimmings_map_[block->id().ToInt()] = + ZoneVector<Node*>(temp_zone()); + } + blocks_to_trimmings_map_[block->id().ToInt()]->push_back(end); + } + UpdateStatus(end, State::kEndStringBuilderLoopPhi); + } else { + UpdateStatus(end, State::kEndStringBuilder); + } + } + + string_builder->one_or_two_bytes = one_or_two_byte; + } + +#ifdef DEBUG + if (one_string_builder_or_more_valid) { + broker()->isolate()->set_has_turbofan_string_builders(); + } +#else + USE(one_string_builder_or_more_valid); +#endif +} + +void StringBuilderOptimizer::VisitGraph() { + // Initial discovery of the potential string builders. + for (BasicBlock* block : *schedule()->rpo_order()) { + // Removing finished loops. + while (!loop_headers_.empty() && + loop_headers_.back()->loop_end() == block) { + loop_headers_.pop_back(); + } + // Adding new loop if necessary. + if (block->IsLoopHeader()) { + loop_headers_.push_back(block); + } + // Visiting block content. + for (Node* node : *block->nodes()) { + VisitNode(node, block); + } + } + + // Finalize valid string builders (moving valid nodes to status + // kConfirmedInStringBuilder or kEndStringBuilder), and collecting the + // trimming points. + FinalizeStringBuilders(); +} + +void StringBuilderOptimizer::Run() { VisitGraph(); } + +StringBuilderOptimizer::StringBuilderOptimizer(JSGraph* jsgraph, + Schedule* schedule, + Zone* temp_zone, + JSHeapBroker* broker) + : jsgraph_(jsgraph), + schedule_(schedule), + temp_zone_(temp_zone), + broker_(broker), + blocks_to_trimmings_map_(schedule->BasicBlockCount(), temp_zone), + status_(jsgraph->graph()->NodeCount(), + Status{kInvalidId, State::kUnvisited}, temp_zone), + string_builders_(temp_zone), + loop_headers_(temp_zone) {} + +} // namespace compiler +} // namespace internal +} // namespace v8 |