summaryrefslogtreecommitdiff
path: root/deps/v8/src/compiler/revectorizer.h
blob: f5e62b0bedbfd3ef24362640af2d2ee6a6ca00e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_COMPILER_REVECTORIZER_H_
#define V8_COMPILER_REVECTORIZER_H_

// Revectorizer is an optimization to promote pairs of simd128 nodes to new
// simd256 nodes accelerated by wider vector available from hardware e.g. the
// YMM registers from AVX2 instruction set when possible and beneficial. The
// main algorithm is based on the Superword Level Parallel (SLP) vectorization
// technique.

#include <vector>

#include "src/base/small-vector.h"
#include "src/compiler/graph.h"
#include "src/compiler/linear-scheduler.h"
#include "src/compiler/machine-graph.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-marker.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/node.h"
#include "src/compiler/schedule.h"
#include "src/zone/zone-containers.h"

namespace v8 {
namespace internal {
namespace compiler {

struct V8_EXPORT_PRIVATE MemoryOffsetComparer {
  bool operator()(const Node* lhs, const Node* rhs) const;
};

using StoreNodeSet = ZoneSet<Node*, MemoryOffsetComparer>;

// A PackNode consists of a fixed number of isomorphic simd128 nodes which can
// execute in parallel and convert to a 256-bit simd node later. The nodes in a
// PackNode must satisfy that they can be scheduled in the same basic block and
// are mutually independent.
class PackNode final : public NON_EXPORTED_BASE(ZoneObject) {
 public:
  explicit PackNode(Zone* zone, const ZoneVector<Node*>& node_group)
      : nodes_(node_group.cbegin(), node_group.cend(), zone),
        operands_(zone),
        revectorized_node_(nullptr) {}
  const ZoneVector<Node*>& Nodes() const { return nodes_; }
  bool IsSame(const ZoneVector<Node*>& node_group) const {
    return nodes_ == node_group;
  }
  Node* RevectorizedNode() { return revectorized_node_; }
  void SetRevectorizedNode(Node* node) { revectorized_node_ = node; }
  // returns the index operand of this PackNode.
  PackNode* GetOperand(size_t index) {
    DCHECK_LT(index, operands_.size());
    return operands_[index];
  }

  ZoneVector<PackNode*>::size_type GetOperandsSize() const {
    return operands_.size();
  }

  void SetOperand(size_t index, PackNode* pnode) {
    if (operands_.size() < index + 1) operands_.resize(index + 1);
    operands_[index] = pnode;
  }

  void Print() const;

 private:
  ZoneVector<Node*> nodes_;
  ZoneVector<PackNode*> operands_;
  Node* revectorized_node_;
};

// An auxillary tree structure with a set of PackNodes based on the Superword
// Level Parallelism (SLP) vectorization technique. The BuildTree method will
// start from a selected root, e.g. a group of consecutive stores, and extend
// through value inputs to create new PackNodes if the inputs are valid, or
// conclude that the current PackNode is a leaf and terminate the tree.
// Below is an example of SLPTree where loads and stores in each PackNode are
// all consecutive.
// [Load0, Load1]  [Load2, Load3]
//           \       /
//          [Add0, Add1]
//                |
//         [Store0, Store1]
class SLPTree : public NON_EXPORTED_BASE(ZoneObject) {
 public:
  explicit SLPTree(Zone* zone, Graph* graph)
      : zone_(zone),
        graph_(graph),
        root_(nullptr),
        on_stack_(zone),
        stack_(zone),
        node_to_packnode_(zone) {
    scheduler_ = zone->New<LinearScheduler>(zone, graph);
  }

  PackNode* BuildTree(const ZoneVector<Node*>& roots);
  void DeleteTree();

  PackNode* GetPackNode(Node* node);

  void Print(const char* info);

  template <typename FunctionType>
  void ForEach(FunctionType callback);

  Node* GetEarlySchedulePosition(Node* node) {
    return scheduler_->GetEarlySchedulePosition(node);
  }

 private:
  friend class LinearScheduler;

  // This is the recursive part of BuildTree.
  PackNode* BuildTreeRec(const ZoneVector<Node*>& node_group, unsigned depth);

  // Baseline: create a new PackNode, and return.
  PackNode* NewPackNode(const ZoneVector<Node*>& node_group);

  // Recursion: create a new PackNode and call BuildTreeRec recursively
  PackNode* NewPackNodeAndRecurs(const ZoneVector<Node*>& node_group,
                                 int start_index, int count, unsigned depth);

  bool CanBePacked(const ZoneVector<Node*>& node_group);

  Graph* graph() const { return graph_; }
  Zone* zone() const { return zone_; }

  // Node stack operations.
  void PopStack();
  void PushStack(const ZoneVector<Node*>& node_group);
  void ClearStack();
  bool OnStack(Node* node);
  bool AllOnStack(const ZoneVector<Node*>& node_group);
  bool StackTopIsPhi();

  void TryReduceLoadChain(const ZoneVector<Node*>& loads);
  bool IsSideEffectFreeLoad(const ZoneVector<Node*>& node_group);
  bool SameBasicBlock(Node* node0, Node* node1) {
    return scheduler_->SameBasicBlock(node0, node1);
  }

  Zone* const zone_;
  Graph* const graph_;
  PackNode* root_;
  LinearScheduler* scheduler_;
  ZoneSet<Node*> on_stack_;
  ZoneStack<ZoneVector<Node*>> stack_;
  // Maps a specific node to PackNode.
  ZoneUnorderedMap<Node*, PackNode*> node_to_packnode_;
  static constexpr size_t RecursionMaxDepth = 1000;
};

// The Revectorizer pass will firstly collect seeds with valid group of
// consecutive stores as the root to build the SLPTree. If the SLPTree is built
// successfully, it will estimate the cost of the 256-bit transformation for
// each PackNode and conduct the final revectorization if benefitial.
class V8_EXPORT_PRIVATE Revectorizer final
    : public NON_EXPORTED_BASE(ZoneObject) {
 public:
  Revectorizer(Zone* zone, Graph* graph, MachineGraph* mcgraph)
      : zone_(zone),
        graph_(graph),
        mcgraph_(mcgraph),
        group_of_stores_(zone),
        support_simd256_(false) {
    DetectCPUFeatures();
    slp_tree_ = zone_->New<SLPTree>(zone, graph);
  }

  void DetectCPUFeatures();
  bool TryRevectorize(const char* name);

 private:
  void CollectSeeds();

  bool ReduceStoreChains(ZoneMap<Node*, StoreNodeSet>* store_chains);
  bool ReduceStoreChain(const ZoneVector<Node*>& Stores);

  void PrintStores(ZoneMap<Node*, StoreNodeSet>* store_chains);
  Zone* zone() const { return zone_; }
  Graph* graph() const { return graph_; }
  MachineGraph* mcgraph() const { return mcgraph_; }

  PackNode* GetPackNode(Node* node) const {
    return slp_tree_->GetPackNode(node);
  }

  bool DecideVectorize();

  void SetEffectInput(PackNode* pnode, int index, Node*& nput);
  void SetMemoryOpInputs(base::SmallVector<Node*, 2>& inputs, PackNode* pnode,
                         int index);
  Node* VectorizeTree(PackNode* pnode);

  Zone* const zone_;
  Graph* const graph_;
  MachineGraph* const mcgraph_;
  ZoneMap<Node*, ZoneMap<Node*, StoreNodeSet>*> group_of_stores_;
  SLPTree* slp_tree_;

  bool support_simd256_;
};

}  // namespace compiler
}  // namespace internal
}  // namespace v8

#endif  // V8_COMPILER_REVECTORIZER_H_