deps/v8/test/cctest/wasm/test-run-wasm-simd-liftoff.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file contains tests that run only on Liftoff, and each test verifies
// that the code was compiled by Liftoff. The default behavior is that each
// function is first attempted to be compiled by Liftoff, and if it fails, fall
// back to TurboFan. However we want to enforce that Liftoff is the tier that
// compiles these functions, in order to verify correctness of SIMD
// implementation in Liftoff.

#include "src/codegen/assembler-inl.h"
#include "src/wasm/wasm-opcodes.h"
#include "test/cctest/cctest.h"
#include "test/cctest/wasm/wasm-run-utils.h"
#include "test/common/wasm/test-signatures.h"
#include "test/common/wasm/wasm-macro-gen.h"

namespace v8 {
namespace internal {
namespace wasm {
namespace test_run_wasm_simd_liftoff {

TEST(S128Local) {
  WasmRunner<int32_t> r(TestExecutionTier::kLiftoff);
  byte temp1 = r.AllocateLocal(kWasmS128);
  r.Build({WASM_LOCAL_SET(temp1, WASM_LOCAL_GET(temp1)), WASM_ONE});
  CHECK_EQ(1, r.Call());
}

TEST(S128Global) {
  WasmRunner<int32_t> r(TestExecutionTier::kLiftoff);

  int32_t* g0 = r.builder().AddGlobal<int32_t>(kWasmS128);
  int32_t* g1 = r.builder().AddGlobal<int32_t>(kWasmS128);
  r.Build({WASM_GLOBAL_SET(1, WASM_GLOBAL_GET(0)), WASM_ONE});

  int32_t expected = 0x1234;
  for (int i = 0; i < 4; i++) {
    LANE(g0, i) = expected;
  }
  r.Call();
  for (int i = 0; i < 4; i++) {
    int32_t actual = LANE(g1, i);
    CHECK_EQ(actual, expected);
  }
}

TEST(S128Param) {
  // Test how SIMD parameters in functions are processed. There is no easy way
  // to specify a SIMD value when initializing a WasmRunner, so we manually
  // add a new function with the right signature, and call it from main.
  WasmRunner<int32_t> r(TestExecutionTier::kLiftoff);
  TestSignatures sigs;
  // We use a temp local to materialize a SIMD value, since at this point
  // Liftoff does not support any SIMD operations.
  byte temp1 = r.AllocateLocal(kWasmS128);
  WasmFunctionCompiler& simd_func = r.NewFunction(sigs.i_s());
  simd_func.Build({WASM_ONE});

  r.Build(
      {WASM_CALL_FUNCTION(simd_func.function_index(), WASM_LOCAL_GET(temp1))});

  CHECK_EQ(1, r.Call());
}

TEST(S128Return) {
  // Test how functions returning SIMD values are processed.
  WasmRunner<int32_t> r(TestExecutionTier::kLiftoff);
  TestSignatures sigs;
  WasmFunctionCompiler& simd_func = r.NewFunction(sigs.s_i());
  byte temp1 = simd_func.AllocateLocal(kWasmS128);
  simd_func.Build({WASM_LOCAL_GET(temp1)});

  r.Build({WASM_CALL_FUNCTION(simd_func.function_index(), WASM_ONE), kExprDrop,
           WASM_ONE});

  CHECK_EQ(1, r.Call());
}

TEST(REGRESS_1088273) {
  // TODO(v8:9418): This is a regression test for Liftoff, translated from a
  // mjsunit test. We do not have I64x2Mul lowering yet, so this will cause a
  // crash on arch that don't support SIMD 128 and require lowering, thus
  // explicitly skip them.
  if (!CpuFeatures::SupportsWasmSimd128()) return;

  WasmRunner<int32_t> r(TestExecutionTier::kLiftoff);
  TestSignatures sigs;
  WasmFunctionCompiler& simd_func = r.NewFunction(sigs.s_i());
  byte temp1 = simd_func.AllocateLocal(kWasmS128);
  simd_func.Build({WASM_LOCAL_GET(temp1)});

  r.Build({WASM_SIMD_SPLAT(I8x16, WASM_I32V(0x80)),
           WASM_SIMD_SPLAT(I8x16, WASM_I32V(0x92)),
           WASM_SIMD_I16x8_EXTRACT_LANE_U(0, WASM_SIMD_OP(kExprI64x2Mul))});
  CHECK_EQ(18688, r.Call());
}

// A test to exercise logic in Liftoff's implementation of shuffle. The
// implementation in Liftoff is a bit more tricky due to shuffle requiring
// adjacent registers in ARM/ARM64.
TEST(I8x16Shuffle) {
  WasmRunner<int32_t> r(TestExecutionTier::kLiftoff);
  // Temps to use up registers and force non-adjacent registers for shuffle.
  byte local0 = r.AllocateLocal(kWasmS128);
  byte local1 = r.AllocateLocal(kWasmS128);

  //  g0 and g1 are globals that hold input values for the shuffle,
  //  g0 contains byte array [0, 1, ... 15], g1 contains byte array [16, 17,
  //  ... 31]. They should never be overwritten - write only to output.
  byte* g0 = r.builder().AddGlobal<byte>(kWasmS128);
  byte* g1 = r.builder().AddGlobal<byte>(kWasmS128);
  for (int i = 0; i < 16; i++) {
    LANE(g0, i) = i;
    LANE(g1, i) = i + 16;
  }

  // Output global holding a kWasmS128.
  byte* output = r.builder().AddGlobal<byte>(kWasmS128);

  // i8x16_shuffle(lhs, rhs, pattern) will take the last element of rhs and
  // place it into the last lane of lhs.
  std::array<byte, 16> pattern = {
      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31}};

  // Set up locals so shuffle is called with non-adjacent registers v2 and v0.
  r.Build(
      {WASM_LOCAL_SET(local0, WASM_GLOBAL_GET(1)),  // local0 is in v0
       WASM_LOCAL_SET(local1, WASM_GLOBAL_GET(0)),  // local1 is in v1
       WASM_GLOBAL_GET(0),                          // global0 is in v2
       WASM_LOCAL_GET(local0),                      // local0 is in v0
       WASM_GLOBAL_SET(2, WASM_SIMD_I8x16_SHUFFLE_OP(kExprI8x16Shuffle, pattern,
                                                     WASM_NOP, WASM_NOP)),
       WASM_ONE});

  r.Call();

  // The shuffle pattern only changes the last element.
  for (int i = 0; i < 15; i++) {
    byte actual = LANE(output, i);
    CHECK_EQ(i, actual);
  }
  CHECK_EQ(31, LANE(output, 15));
}

// Exercise logic in Liftoff's implementation of shuffle when inputs to the
// shuffle are the same register.
TEST(I8x16Shuffle_SingleOperand) {
  WasmRunner<int32_t> r(TestExecutionTier::kLiftoff);
  byte local0 = r.AllocateLocal(kWasmS128);

  byte* g0 = r.builder().AddGlobal<byte>(kWasmS128);
  for (int i = 0; i < 16; i++) {
    LANE(g0, i) = i;
  }

  byte* output = r.builder().AddGlobal<byte>(kWasmS128);

  // This pattern reverses first operand. 31 should select the last lane of
  // the second operand, but since the operands are the same, the effect is that
  // the first operand is reversed.
  std::array<byte, 16> pattern = {
      {31, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}};

  // Set up locals so shuffle is called with non-adjacent registers v2 and v0.
  r.Build(
      {WASM_LOCAL_SET(local0, WASM_GLOBAL_GET(0)), WASM_LOCAL_GET(local0),
       WASM_LOCAL_GET(local0),
       WASM_GLOBAL_SET(1, WASM_SIMD_I8x16_SHUFFLE_OP(kExprI8x16Shuffle, pattern,
                                                     WASM_NOP, WASM_NOP)),
       WASM_ONE});

  r.Call();

  for (int i = 0; i < 16; i++) {
    // Check that the output is the reverse of input.
    byte actual = LANE(output, i);
    CHECK_EQ(15 - i, actual);
  }
}

// Exercise Liftoff's logic for zero-initializing stack slots. We were using an
// incorrect instruction for storing zeroes into the slot when the slot offset
// was too large to fit in the instruction as an immediate.
TEST(FillStackSlotsWithZero_CheckStartOffset) {
  WasmRunner<int64_t> r(TestExecutionTier::kLiftoff);
  // Function that takes in 32 i64 arguments, returns i64. This gets us a large
  // enough starting offset from which we spill locals.
  // start = 32 * 8 + 16 (instance) = 272 (cannot fit in signed int9).
  FunctionSig* sig =
      r.CreateSig<int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t,
                  int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t,
                  int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t,
                  int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t,
                  int64_t, int64_t, int64_t, int64_t, int64_t>();
  WasmFunctionCompiler& simd_func = r.NewFunction(sig);

  // We zero 16 bytes at a time using stp, so allocate locals such that we get a
  // remainder, 8 in this case, so we hit the case where we use str.
  simd_func.AllocateLocal(kWasmS128);
  simd_func.AllocateLocal(kWasmI64);
  simd_func.Build({WASM_I64V_1(1)});

  r.Build({WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_I64V_1(1),
           WASM_CALL_FUNCTION0(simd_func.function_index())});

  CHECK_EQ(1, r.Call());
}

}  // namespace test_run_wasm_simd_liftoff
}  // namespace wasm
}  // namespace internal
}  // namespace v8