diff options
Diffstat (limited to 'deps/v8/src/compiler/string-builder-optimizer.h')
-rw-r--r-- | deps/v8/src/compiler/string-builder-optimizer.h | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/deps/v8/src/compiler/string-builder-optimizer.h b/deps/v8/src/compiler/string-builder-optimizer.h new file mode 100644 index 0000000000..94f4ce951c --- /dev/null +++ b/deps/v8/src/compiler/string-builder-optimizer.h @@ -0,0 +1,378 @@ +// Copyright 2022 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_ +#define V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_ + +#include <cstdint> +#include <unordered_map> +#include <vector> + +#include "src/base/macros.h" +#include "src/base/optional.h" +#include "src/compiler/graph-assembler.h" +#include "src/compiler/graph-reducer.h" +#include "src/compiler/js-graph.h" +#include "src/compiler/machine-operator.h" +#include "src/compiler/node-marker.h" +#include "src/compiler/node.h" +#include "src/compiler/schedule.h" +#include "src/zone/zone-containers.h" + +namespace v8 { +namespace internal { + +class Zone; + +namespace compiler { + +class JSGraphAssembler; +class NodeOriginTable; +class Schedule; +class SourcePositionTable; +class JSHeapBroker; + +// StringBuilderOptimizer aims at avoid ConsString for some loops that build +// strings, and instead update a mutable over-allocated backing store, while +// keeping a (mutable) SlicedString to the valid part of the backing store. +// +// StringBuilderOptimizer only does the analysis: it finds out which nodes could +// benefit from this optimization. Then, EffectControlLinearizer actually +// applies the optimization to the graph. +// +// A typical example of what the StringBuilderOptimizer can optimize is: +// +// let s = ""; +// for (...) { +// s += "..."; +// } +// +// In general, for a series of concatenations to be optimized, they need: +// - To start on a single initial concatenation +// - All the concatenations in the string builder must have constant strings +// or String.FromCharCode on their right-hand side. +// - At least one of the concatenation must be in a loop. +// +// Because everything is nicer with a picture, here is one of what kind of +// patterns the StringBuilderOptimizer tries to optimize: +// +// +--------+ +// |kLiteral| +// +--------+ +// | +// | +// v +// +-------------+ +--------+ +// |kStringConcat| <------- |kLiteral| +// +-------------+ +--------+ +// | +// | +// v +// optionally, +// more kStringConcat +// | +// | +// v +// +----+ +// -------->|kPhi|------------------------------------------ +// | +----+ | +// | | | +// | | | +// | | | +// | | | +// | | | +// | | | +// | | | +// | v | +// | +-------------+ +--------+ | +// | |kStringConcat| <------- |kLiteral| | +// | +-------------+ +--------+ | +// | | | +// | | | +// | v | +// | optionally, v +// | more kStringConcat optionally, +// | | more kStringConcat +// | | or more kPhi/loops +// | | | +// ------------| | +// | +// | +// v +// +// Where "kLiteral" actually means "either a string literal (HeapConstant) or a +// StringFromSingleCharCode". And kStringConcat can also be kNewConsString (when +// the size of the concatenation is known to be more than 13 bytes, Turbofan's +// front-end generates kNewConsString opcodes rather than kStringConcat). +// The StringBuilder also supports merge phis. For instance: +// +// +--------+ +// |kLiteral| +// +--------+ +// | +// | +// v +// +-------------+ +--------+ +// |kStringConcat| <------- |kLiteral| +// +-------------+ +--------+ +// | | +// | | +// | | +// +---------------+ +---------------+ +// | | +// | | +// v v +// +-------------+ +-------------+ +// |kStringConcat| |kStringConcat| +// +-------------+ +-------------+ +// | | +// | | +// | | +// +---------------+ +---------------+ +// | | +// | | +// v v +// +-------------+ +// | kPhi | +// | (merge) | +// +-------------+ +// | +// | +// v +// +// (and, of course, loops and merge can be mixed). + +class OneOrTwoByteAnalysis final { + // The class OneOrTwoByteAnalysis is used to try to statically determine + // whether a string constant or StringFromSingleCharCode is a 1-byte or a + // 2-byte string. + // If we succeed to do this analysis for all of the nodes in a string builder, + // then we know statically whether this string builder is building a 1-byte or + // a 2-byte string, and we can optimize the generated code to remove all + // 1-byte/2-byte checks. + public: + OneOrTwoByteAnalysis(Graph* graph, Zone* zone, JSHeapBroker* broker) + : states_(graph->NodeCount(), State::kUnknown, zone), broker_(broker) {} + + enum class State : uint8_t { + kUnknown, // Not yet determined if the string is 1 or 2-bytes + kOneByte, // Only 1-byte strings in the string builder + kTwoByte, // At least one 2-byte string in the string builder + kCantKnow // Cannot determine statically if the string will be 1 or 2-bytes + + // Lattice of possible transitions: + // + // kUnknown + // / | \ + // / | \ + // v | \ + // kOneByte | | + // | | | | + // | | | | + // | v v | + // | kTwoByte | + // | | / + // \ | / + // v v v + // kCantKnow + // + // Which means that for instance it's possible to realize that a kUnknown + // string builder will produce a 1-byte string, and we can later realize + // that it will instead be a 2-byte string. Or, we could be in kOneByte + // state, and then realize that the string may or may not end up being + // 2-byte, so we'll move to kCantKnow state. + }; + + // Computes and returns a State reflecting whether {node} is a 1-byte or + // 2-byte string. + State OneOrTwoByte(Node* node); + + // Computes whether the string builder will be on 1-byte or 2-byte if it + // contains two nodes that have states {a} and {b}. For instance, if both {a} + // and {b} are kOneByte, ConcatResultIsOneOrTwoByte returns kOneByte. + static State ConcatResultIsOneOrTwoByte(State a, State b); + + private: + // Returns the positive integral range that {node} can take. If {node} can be + // negative or is not a number, returns nullopt. If the range exceeds 2**32, + // returns nullopt as well. The analysis of TryGetRange is not complete (some + // operators are ignored), so if {node} isn't handled, then nullopt is + // returned. If this function returns a range between 0 and 255, then we + // assume that calling StringFromSingleCharCode on {node} will produce a + // 1-byte string. The analysis is sound (it doesn't make mistake), but is not + // complete (it bails out (returns nullopt) on operators that are not + // handled). + base::Optional<std::pair<int64_t, int64_t>> TryGetRange(Node* node); + + JSHeapBroker* broker() { return broker_; } + + ZoneVector<State> states_; + JSHeapBroker* broker_; +}; + +class V8_EXPORT_PRIVATE StringBuilderOptimizer final { + public: + StringBuilderOptimizer(JSGraph* jsgraph, Schedule* schedule, Zone* temp_zone, + JSHeapBroker* broker); + + // Returns true if some trimming code should be inserted at the beginning of + // {block} to finalize some string builders. + bool BlockShouldFinalizeStringBuilders(BasicBlock* block); + // Returns which nodes should be trimmed at the beginning of {block} to + // finalize some string builders. + ZoneVector<Node*> GetStringBuildersToFinalize(BasicBlock* block); + + // Returns true if {node} is the last node of a StringBuilder (which means + // that trimming code should be inserted after {node}). + // Note that string builders cannot end in the middle of a loop (unless it was + // started in the same loop). The way it's enforced is that when we first + // visit a loop Phi that could be part of a String Builder, we set its status + // to State::kPendingPhi. Only once we've visited the whole loop and the + // backedge and that the use chain following the loop phi up to and including + // the backedge are valid as part of a String Builder, then the loop phi + // status is siwtched to State::kInStringBuilder. Then, in the final step + // where we switch the status to State::kConfirmedInStringBuilder, we ignore + // nodes that have a status that isn't kInStringBuilder, which means that we + // ignore loop phis that still have the kPendingPhi status (and their + // successors). The String Builders thus cannot end inside loops. + bool IsStringBuilderEnd(Node* node); + // Returns true if {node} is a the last node of a StringBuilder and is not a + // loop phi. The "loop phi" distinction matters, because trimming for loop + // phis is trickier (because we don't want to trim at every iteration of the + // loop, but only once after the loop). + bool IsNonLoopPhiStringBuilderEnd(Node* node); + // Returns true if {node} is the input of a concatenation that is part of a + // StringBuilder. + bool IsStringBuilderConcatInput(Node* node); + // Returns true if {node} is part of a StringBuilder. + bool ConcatIsInStringBuilder(Node* node); + // Returns true if {node} is the 1st node of a StringBuilder (which means that + // when lowering {node}, we should allocate and initialize everything for this + // particular StringBuilder). + bool IsFirstConcatInStringBuilder(Node* node); + + // Returns a OneOrTwoByteAnalysis::State representing whether the + // StringBuilder that contains {node} is building a 1-byte or a 2-byte. + OneOrTwoByteAnalysis::State GetOneOrTwoByte(Node* node); + + void Run(); + + JSGraph* jsgraph() const { return jsgraph_; } + Graph* graph() const { return jsgraph_->graph(); } + Schedule* schedule() const { return schedule_; } + Zone* temp_zone() const { return temp_zone_; } + JSHeapBroker* broker() const { return broker_; } + + private: + enum class State : uint8_t { + kUnvisited = 0, + kBeginStringBuilder, // A (potential) beginning of a StringBuilder + kInStringBuilder, // A node that could be in a StringBuilder + kPendingPhi, // A phi that could be in a StringBuilder + kConfirmedInStringBuilder, // A node that is definitely in a StringBuilder + kEndStringBuilder, // A node that ends definitely a StringBuilder, and that + // can be trimmed right away + kEndStringBuilderLoopPhi, // A phi that ends a StringBuilder, and whose + // trimming need to be done at the beginning of + // the following blocks. + kInvalid, // A node that we visited and that we can't optimize. + kNumberOfState + }; + + struct Status { + int id; // The id of the StringBuilder that the node belongs to (or + // kInvalidId). + State state; // The state of the node. + }; + static constexpr int kInvalidId = -1; + + Status GetStatus(Node* node) const { + if (node->id() > status_.size()) { + return Status{kInvalidId, State::kInvalid}; + } else { + return status_[node->id()]; + } + } + void SetStatus(Node* node, State state, int id = kInvalidId) { + DCHECK_NE(state, State::kUnvisited); + DCHECK_IMPLIES(id != kInvalidId, state != State::kInvalid); + if (node->id() >= status_.size()) { + // We should really not allocate too many new nodes: the only new nodes we + // allocate are constant inputs of nodes in the string builder that have + // multiple uses. Thus, we use a slow exponential growth for {status_}. + constexpr double growth_factor = 1.1; + status_.resize(node->id() * growth_factor, + Status{kInvalidId, State::kUnvisited}); + } + status_[node->id()] = Status{id, state}; + } + void UpdateStatus(Node* node, State state) { + int id = state == State::kInvalid ? kInvalidId : GetStatus(node).id; + status_[node->id()] = Status{id, state}; + } + + struct StringBuilder { + Node* start; + int id; + bool has_loop_phi; + OneOrTwoByteAnalysis::State one_or_two_bytes; + }; + const StringBuilder kInvalidStringBuilder = { + nullptr, kInvalidId, false, OneOrTwoByteAnalysis::State::kUnknown}; + +#ifdef DEBUG + bool StringBuilderIsValid(StringBuilder string_builder) { + return string_builder.start != nullptr && string_builder.id != kInvalidId && + string_builder.has_loop_phi; + } +#endif + + bool IsLoopPhi(Node* node) const { + return node->opcode() == IrOpcode::kPhi && + schedule()->block(node)->IsLoopHeader(); + } + bool LoopContains(Node* loop_phi, Node* node) { + DCHECK(IsLoopPhi(loop_phi)); + return schedule()->block(loop_phi)->LoopContains(schedule()->block(node)); + } + + int GetStringBuilderIdForConcat(Node* node); + void ReplaceConcatInputIfNeeded(Node* node, int input_idx); + bool CheckNodeUses(Node* node, Node* concat_child, Status status); + bool CheckPreviousNodeUses(Node* child, Status status, + int input_if_loop_phi = 0); + int GetPhiPredecessorsCommonId(Node* node); + + void FinalizeStringBuilders(); + void VisitNode(Node* node, BasicBlock* block); + void VisitGraph(); + + static constexpr bool kAllowAnyStringOnTheRhs = false; + + JSGraph* jsgraph_; + Schedule* schedule_; + Zone* temp_zone_; + JSHeapBroker* broker_; + unsigned int string_builder_count_ = 0; + // {blocks_to_trimmings_map_} is a map from block IDs to loop phi nodes that + // end string builders. For each such node, a trimming should be inserted at + // the beginning of the block (in EffectControlLinearizer) in order to + // properly finish the string builder (well, most things will work if the + // trimming is omitted, but adding this trimming save memory and removes the + // SlicedString indirection; the only thing that would be an issue is that the + // rest of the VM could have access to a SlicedString that is less than + // SlicedString::kMinLength characters, which may or may not break things). + ZoneVector<base::Optional<ZoneVector<Node*>>> blocks_to_trimmings_map_; + ZoneVector<Status> status_; + ZoneVector<StringBuilder> string_builders_; + // {loop_headers_} is used to keep track ot the start of each loop that the + // block currently being visited is part of. + ZoneVector<BasicBlock*> loop_headers_; +}; + +} // namespace compiler +} // namespace internal +} // namespace v8 + +#endif // V8_COMPILER_STRING_BUILDER_OPTIMIZER_H_ |