summaryrefslogtreecommitdiff
path: root/backend/src/backend/gen_insn_selection_optimize.cpp
blob: af5ecc2ba845cef484b5572d7acabf7a2cb36d74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

#include "backend/gen_insn_selection.hpp"
#include "backend/gen_context.hpp"
#include "ir/function.hpp"
#include "ir/liveness.hpp"
#include "ir/profile.hpp"
#include "sys/cvar.hpp"
#include "sys/vector.hpp"
#include <algorithm>
#include <climits>
#include <map>

namespace gbe
{
  //helper functions
  static uint32_t CalculateElements(const GenRegister& reg, uint32_t execWidth)
  {
    uint32_t elements = 0;
    uint32_t elementSize = typeSize(reg.type);
    uint32_t width = GenRegister::width_size(reg);
    // reg may be other insn's source, this insn's width don't force large then execWidth.
    //assert(execWidth >= width);
    uint32_t height = execWidth / width;
    uint32_t vstride = GenRegister::vstride_size(reg);
    uint32_t hstride = GenRegister::hstride_size(reg);
    uint32_t base = reg.nr * GEN_REG_SIZE + reg.subnr;
    for (uint32_t i = 0; i < height; ++i) {
      uint32_t offsetInByte = base;
      for (uint32_t j = 0; j < width; ++j) {
        uint32_t offsetInType = offsetInByte / elementSize;
        //it is possible that offsetInType > 32, it doesn't matter even elements is 32 bit.
        //the reseason is that if one instruction span several registers,
        //the other registers' visit pattern is same as first register if the vstride is normal(width * hstride)
        assert(vstride == width * hstride);
        elements |= (1 << offsetInType);
        offsetInByte += hstride * elementSize;
      }
      base += vstride * elementSize;
    }
    return elements;
  }

  class SelOptimizer
  {
  public:
    SelOptimizer(const GenContext& ctx, uint32_t features) : ctx(ctx), features(features) {}
    virtual void run() = 0;
    virtual ~SelOptimizer() {}
  protected:
    const GenContext &ctx;      //in case that we need it
    uint32_t features;
  };

  class SelBasicBlockOptimizer : public SelOptimizer
  {
  public:
    SelBasicBlockOptimizer(const GenContext& ctx,
                           const ir::Liveness::LiveOut& liveout,
                           uint32_t features,
                           SelectionBlock &bb) :
        SelOptimizer(ctx, features), bb(bb), liveout(liveout), optimized(false)
    {
    }
    ~SelBasicBlockOptimizer() {}
    virtual void run();

  private:
    // local copy propagation
    class ReplaceInfo
    {
    public:
      ReplaceInfo(SelectionInstruction& insn,
                  const GenRegister& intermedia,
                  const GenRegister& replacement) :
                  insn(insn), intermedia(intermedia), replacement(replacement)
      {
        assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD);
        assert(&(insn.dst(0)) == &intermedia);
        this->elements = CalculateElements(intermedia, insn.state.execWidth);
        replacementOverwritten = false;
      }
      ~ReplaceInfo()
      {
        this->toBeReplaceds.clear();
      }

      SelectionInstruction& insn;
      const GenRegister& intermedia;
      uint32_t elements;
      const GenRegister& replacement;
      set<GenRegister*> toBeReplaceds;
      bool replacementOverwritten;
      GBE_CLASS(ReplaceInfo);
    };
    typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap;
    ReplaceInfoMap replaceInfoMap;
    void doLocalCopyPropagation();
    void addToReplaceInfoMap(SelectionInstruction& insn);
    void changeInsideReplaceInfoMap(const SelectionInstruction& insn, GenRegister& var);
    void removeFromReplaceInfoMap(const SelectionInstruction& insn, const GenRegister& var);
    void doReplacement(ReplaceInfo* info);
    bool CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var);
    void cleanReplaceInfoMap();
    void doNegAddOptimization(SelectionInstruction &insn);

    SelectionBlock &bb;
    const ir::Liveness::LiveOut& liveout;
    bool optimized;
    static const size_t MaxTries = 1;   //the max times of optimization try
  };

  void SelBasicBlockOptimizer::doReplacement(ReplaceInfo* info)
  {
    for (GenRegister* reg : info->toBeReplaceds) {
      GenRegister::propagateRegister(*reg, info->replacement);
    }
    bb.insnList.erase(&(info->insn));
    optimized = true;
  }

  void SelBasicBlockOptimizer::cleanReplaceInfoMap()
  {
    for (auto& pair : replaceInfoMap) {
      ReplaceInfo* info = pair.second;
      doReplacement(info);
      delete info;
    }
    replaceInfoMap.clear();
  }

  void SelBasicBlockOptimizer::removeFromReplaceInfoMap(const SelectionInstruction& insn, const GenRegister& var)
  {
    for (ReplaceInfoMap::iterator pos = replaceInfoMap.begin(); pos != replaceInfoMap.end(); ++pos) {
      ReplaceInfo* info = pos->second;
      if (info->intermedia.reg() == var.reg()) {   //intermedia is overwritten
        if (info->intermedia.quarter == var.quarter && info->intermedia.subnr == var.subnr && info->intermedia.nr == var.nr) {
          // We need to check the if intermedia is fully overwritten, they may be in some prediction state.
          if (CanBeReplaced(info, insn, var))
            doReplacement(info);
        }
        replaceInfoMap.erase(pos);
        delete info;
        return;
      }
      if (info->replacement.reg() == var.reg()) {  //replacement is overwritten
        //there could be more than one replacements (with different physical subnr) overwritten,
        //so do not break here, need to scann the whole map.
        //here is an example:
        // mov %10, %9.0
        // mov %11, %9.1
        // ...
        // mov %9, %8
        //both %9.0 and %9.1 are collected into replacement in the ReplaceInfoMap after the first two insts are scanned.
        //when scan the last inst that %9 is overwritten, we should flag both %9.0 and %9.1 in the map.
        info->replacementOverwritten = true;
      }
    }
  }

  void SelBasicBlockOptimizer::addToReplaceInfoMap(SelectionInstruction& insn)
  {
    assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD);
    GenRegister &src = insn.src(0);
    if (insn.opcode == SEL_OP_ADD) {
      if (src.file == GEN_IMMEDIATE_VALUE)
        src = insn.src(1);
    }

    const GenRegister& dst = insn.dst(0);
    if (src.type != dst.type || src.file != dst.file)
      return;

    if (src.hstride != GEN_HORIZONTAL_STRIDE_0 && src.hstride != dst.hstride )
      return;

    if (liveout.find(dst.reg()) != liveout.end())
      return;

    ReplaceInfo* info = new ReplaceInfo(insn, dst, src);
    replaceInfoMap[dst.reg()] = info;
  }

  bool SelBasicBlockOptimizer::CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var)
  {
    //some conditions here are very strict, while some conditions are very light
    //the reason is that i'm unable to find a perfect condition now in the first version
    //need to refine the conditions when debugging/optimizing real kernels

    if (insn.opcode == SEL_OP_BSWAP) //should remove once bswap issue is fixed
      return false;

    //the src modifier is not supported by the following instructions
    if(info->replacement.negation || info->replacement.absolute)
    {
      switch(insn.opcode)
      {
        case SEL_OP_MATH:
        {
          switch(insn.extra.function)
          {
            case GEN_MATH_FUNCTION_INT_DIV_QUOTIENT:
            case GEN_MATH_FUNCTION_INT_DIV_REMAINDER:
            case GEN_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
              return false;
            default:
              break;
          }

          break;
        }
        case SEL_OP_CBIT:
        case SEL_OP_FBH:
        case SEL_OP_FBL:
        case SEL_OP_BRC:
        case SEL_OP_BRD:
        case SEL_OP_BFREV:
        case SEL_OP_LZD:
        case SEL_OP_HADD:
        case SEL_OP_RHADD:
          return false;
        default:
          break;
      }
    }

    if (insn.isWrite() || insn.isRead()) //register in selection vector
      return false;

    if (features & SIOF_LOGICAL_SRCMOD)
      if ((insn.opcode == SEL_OP_AND || insn.opcode == SEL_OP_NOT || insn.opcode == SEL_OP_OR || insn.opcode == SEL_OP_XOR) &&
            (info->replacement.absolute || info->replacement.negation))
        return false;

    if (features & SIOF_OP_MOV_LONG_REG_RESTRICT && insn.opcode == SEL_OP_MOV) {
      const GenRegister& dst = insn.dst(0);
      if (dst.isint64() && !info->replacement.isint64() && info->elements != CalculateElements(info->replacement, insn.state.execWidth))
        return false;
    }

    if (info->replacementOverwritten)
      return false;

    if (info->insn.state.noMask == 0 && insn.state.noMask == 1)
      return false;

    // If insn is in no prediction state, it will overwrite the info insn.
    if (info->insn.state.predicate != insn.state.predicate && insn.state.predicate != GEN_PREDICATE_NONE)
      return false;

    if (info->insn.state.inversePredicate != insn.state.inversePredicate)
      return false;

    if (info->intermedia.type == var.type && info->intermedia.quarter == var.quarter &&
        info->intermedia.subnr == var.subnr && info->intermedia.nr == var.nr) {
      uint32_t elements = CalculateElements(var, insn.state.execWidth);  //considering width, hstrid, vstrid and execWidth
      if (info->elements == elements)
        return true;
    }

    return false;
  }

  void SelBasicBlockOptimizer::changeInsideReplaceInfoMap(const SelectionInstruction& insn, GenRegister& var)
  {
    ReplaceInfoMap::iterator it = replaceInfoMap.find(var.reg());
    if (it != replaceInfoMap.end()) {    //same ir register
      ReplaceInfo* info = it->second;
      if (CanBeReplaced(info, insn, var)) {
        info->toBeReplaceds.insert(&var);
      } else {
        //if it is the same ir register, but could not be replaced for some reason,
        //that means we could not remove MOV instruction, and so no replacement,
        //so we'll remove the info for this case.
        replaceInfoMap.erase(it);
        delete info;
      }
    }
  }

  void SelBasicBlockOptimizer::doLocalCopyPropagation()
  {
    for (SelectionInstruction &insn : bb.insnList) {
      for (uint8_t i = 0; i < insn.srcNum; ++i)
        changeInsideReplaceInfoMap(insn, insn.src(i));

      for (uint8_t i = 0; i < insn.dstNum; ++i)
        removeFromReplaceInfoMap(insn, insn.dst(i));

      if (insn.opcode == SEL_OP_MOV)
        addToReplaceInfoMap(insn);

      doNegAddOptimization(insn);
    }
    cleanReplaceInfoMap();
  }

  /* LLVM transform Mad(a, -b, c) to
     Add b, -b, 0
     Mad val, a, b, c
     for Gen support negtive modifier, mad(a, -b, c) is native suppoted.
     Also it can be used for the same like instruction sequence.
     Do it just like a:  mov b, -b, so it is a Mov operation like LocalCopyPropagation
  */
  void SelBasicBlockOptimizer::doNegAddOptimization(SelectionInstruction &insn) {
    if (insn.opcode == SEL_OP_ADD) {
      GenRegister src0 = insn.src(0);
      GenRegister src1 = insn.src(1);
      if ((src0.negation && src1.file == GEN_IMMEDIATE_VALUE && src1.value.f == 0.0f) ||
          (src1.negation && src0.file == GEN_IMMEDIATE_VALUE && src0.value.f == 0.0f))
        addToReplaceInfoMap(insn);
    }
  }

  void SelBasicBlockOptimizer::run()
  {
    for (size_t i = 0; i < MaxTries; ++i) {
      optimized = false;

      doLocalCopyPropagation();
      //doOtherLocalOptimization();

      if (!optimized)
        break;      //break since no optimization found at this round
    }
  }

  class SelGlobalOptimizer : public SelOptimizer
  {
  public:
    SelGlobalOptimizer(const GenContext& ctx, uint32_t features) : SelOptimizer(ctx, features) {}
    ~SelGlobalOptimizer() {}
    virtual void run();
  };

  void SelGlobalOptimizer::run()
  {

  }

  void Selection::optimize()
  {
    //do basic block level optimization
    for (SelectionBlock &block : *blockList) {
      SelBasicBlockOptimizer bbopt(getCtx(), getCtx().getLiveOut(block.bb), opt_features, block);
      bbopt.run();
    }

    //do global optimization

  }

  void Selection::addID()
  {
    uint32_t insnID = 0;
    for (auto &block : *blockList)
      for (auto &insn : block.insnList) {
        insn.ID  = insnID;
        insnID += 2;
      }
  }
} /* namespace gbe */