1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
|
/* Test vdiv works correctly. */
/* { dg-do run } */
/* { dg-options "-O3 --save-temps" } */
#include <arm_neon.h>
#define FLT_INFINITY (__builtin_inff ())
#define DBL_INFINITY (__builtin_inf ())
#define NAN (__builtin_nan (""))
#define PI 3.141592653589793
#define PI_4 0.7853981633974483
#define SQRT2 1.4142135623730951
#define SQRT1_2 0.7071067811865475
#define TESTA0 PI
#define TESTA1 -PI
#define TESTA2 PI
#define TESTA3 -PI
#define TESTA4 1.0
#define TESTA5 -1.0
#define TESTA6 1.0
#define TESTA7 -1.0
/* 2^25+1, float has 24 significand bits
according to Single-precision floating-point format. */
#define TESTA8_FLT 33554433
/* 2^54+1, double has 53 significand bits
according to Double-precision floating-point format. */
#define TESTA8_DBL 18014398509481985
#define TESTA9 -TESTA8
#define TESTA10 TESTA8
#define TESTA11 -TESTA8
#define TESTA12 NAN
#define TESTA13 1.0
#define TESTA14 INFINITY
#define TESTA15 -INFINITY
#define TESTA16 INFINITY
#define TESTA17 9.0
#define TESTA18 11.0
#define TESTA19 13.0
#define TESTB0 4.0
#define TESTB1 4.0
#define TESTB2 -4.0
#define TESTB3 -4.0
#define TESTB4 SQRT2
#define TESTB5 SQRT2
#define TESTB6 -SQRT2
#define TESTB7 -SQRT2
#define TESTB8 2.0
#define TESTB9 2.0
#define TESTB10 -2.0
#define TESTB11 -2.0
#define TESTB12 3.0
#define TESTB13 NAN
#define TESTB14 5.0
#define TESTB15 7.0
#define TESTB16 INFINITY
#define TESTB17 INFINITY
#define TESTB18 -INFINITY
#define TESTB19 0
#define ANSW0 PI_4
#define ANSW1 -PI_4
#define ANSW2 -PI_4
#define ANSW3 PI_4
#define ANSW4 SQRT1_2
#define ANSW5 -SQRT1_2
#define ANSW6 -SQRT1_2
#define ANSW7 SQRT1_2
#define ANSW8_FLT 16777216
#define ANSW8_DBL 9007199254740992
#define ANSW9 -ANSW8
#define ANSW10 -ANSW8
#define ANSW11 ANSW8
#define ANSW12 NAN
#define ANSW13 NAN
#define ANSW14 INFINITY
#define ANSW15 -INFINITY
#define ANSW16 NAN
#define ANSW17 0
#define ANSW18 0
#define ANSW19 INFINITY
#define CONCAT(a, b) a##b
#define CONCAT1(a, b) CONCAT (a, b)
#define REG_INFEX64 _
#define REG_INFEX128 q_
#define REG_INFEX(reg_len) REG_INFEX##reg_len
#define POSTFIX(reg_len, data_len) \
CONCAT1 (REG_INFEX (reg_len), f##data_len)
#define DATA_TYPE_32 float
#define DATA_TYPE_64 double
#define DATA_TYPE(data_len) DATA_TYPE_##data_len
#define EPSILON_32 __FLT_EPSILON__
#define EPSILON_64 __DBL_EPSILON__
#define EPSILON(data_len) EPSILON_##data_len
#define LOAD_INST(reg_len, data_len) \
CONCAT1 (vld1, POSTFIX (reg_len, data_len))
#define DIV_INST(reg_len, data_len) \
CONCAT1 (vdiv, POSTFIX (reg_len, data_len))
#define ABS(a) __builtin_fabs (a)
#define ISNAN(a) __builtin_isnan (a)
#define FP_equals(a, b, epsilon) \
( \
((a) == (b)) \
|| (ISNAN (a) && ISNAN (b)) \
|| (ABS (a - b) < epsilon) \
)
#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
#define RUN_TEST(a, b, c, testseta, testsetb, answset, count, \
reg_len, data_len, n) \
{ \
int i; \
INHIB_OPTIMIZATION; \
(a) = LOAD_INST (reg_len, data_len) (testseta[count]); \
(b) = LOAD_INST (reg_len, data_len) (testsetb[count]); \
(c) = LOAD_INST (reg_len, data_len) (answset[count]); \
INHIB_OPTIMIZATION; \
(a) = DIV_INST (reg_len, data_len) (a, b); \
for (i = 0; i < n; i++) \
{ \
INHIB_OPTIMIZATION; \
if (!FP_equals ((a) [i], (c) [i], EPSILON (data_len))) \
return 1; \
} \
}
extern void abort (void);
#define TESTA8 TESTA8_FLT
#define ANSW8 ANSW8_FLT
#define INFINITY FLT_INFINITY
int
test_vdiv_f32 ()
{
int count;
float32x2_t a;
float32x2_t b;
float32x2_t c;
float32_t testseta[10][2] = {
{ TESTA0, TESTA1 }, { TESTA2, TESTA3 },
{ TESTA4, TESTA5 }, { TESTA6, TESTA7 },
{ TESTA8, TESTA9 }, { TESTA10, TESTA11 },
{ TESTA12, TESTA13 }, { TESTA14, TESTA15 },
{ TESTA16, TESTA17 }, { TESTA18, TESTA19 }
};
float32_t testsetb[10][2] = {
{ TESTB0, TESTB1 }, { TESTB2, TESTB3 },
{ TESTB4, TESTB5 }, { TESTB6, TESTB7 },
{ TESTB8, TESTB9 }, { TESTB10, TESTB11 },
{ TESTB12, TESTB13 }, { TESTB14, TESTB15 },
{ TESTB16, TESTB17 }, { TESTB18, TESTB19 }
};
float32_t answset[10][2] = {
{ ANSW0, ANSW1 }, { ANSW2, ANSW3 },
{ ANSW4, ANSW5 }, { ANSW6, ANSW7 },
{ ANSW8, ANSW9 }, { ANSW10, ANSW11 },
{ ANSW12, ANSW13 }, { ANSW14, ANSW15 },
{ ANSW16, ANSW17 }, { ANSW18, ANSW19 }
};
for (count = 0; count < 10; count++)
{
RUN_TEST (a, b, c, testseta, testsetb, answset, count, 64, 32, 2);
}
return 0;
}
/* { dg-final { scan-assembler-times "fdiv\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
#undef TESTA8
#undef ANSW8
#undef INFINITY
#define TESTA8 TESTA8_DBL
#define ANSW8 ANSW8_DBL
#define INFINITY DBL_INFINITY
int
test_vdiv_f64 ()
{
int count;
float64x1_t a;
float64x1_t b;
float64x1_t c;
float64_t testseta[20][1] = {
{ TESTA0 }, { TESTA1 }, { TESTA2 }, { TESTA3 },
{ TESTA4 }, { TESTA5 }, { TESTA6 }, { TESTA7 },
{ TESTA8 }, { TESTA9 }, { TESTA10 }, { TESTA11 },
{ TESTA12 }, { TESTA13 }, { TESTA14 }, { TESTA15 },
{ TESTA16 }, { TESTA17 }, { TESTA18 }, { TESTA19 }
};
float64_t testsetb[20][1] = {
{ TESTB0 }, { TESTB1 }, { TESTB2 }, { TESTB3 },
{ TESTB4 }, { TESTB5 }, { TESTB6 }, { TESTB7 },
{ TESTB8 }, { TESTB9 }, { TESTB10 }, { TESTB11 },
{ TESTB12 }, { TESTB13 }, { TESTB14 }, { TESTB15 },
{ TESTB16 }, { TESTB17 }, { TESTB18 }, { TESTB19 }
};
float64_t answset[20][1] = {
{ ANSW0 }, { ANSW1 }, { ANSW2 }, { ANSW3 },
{ ANSW4 }, { ANSW5 }, { ANSW6 }, { ANSW7 },
{ ANSW8 }, { ANSW9 }, { ANSW10 }, { ANSW11 },
{ ANSW12 }, { ANSW13 }, { ANSW14 }, { ANSW15 },
{ ANSW16 }, { ANSW17 }, { ANSW18 }, { ANSW19 }
};
for (count = 0; count < 20; count++)
{
RUN_TEST (a, b, c, testseta, testsetb, answset, count, 64, 64, 1);
}
return 0;
}
/* { dg-final { scan-assembler-times "fdiv\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
#undef TESTA8
#undef ANSW8
#undef INFINITY
#define TESTA8 TESTA8_FLT
#define ANSW8 ANSW8_FLT
#define INFINITY FLT_INFINITY
int
test_vdivq_f32 ()
{
int count;
float32x4_t a;
float32x4_t b;
float32x4_t c;
float32_t testseta[5][4] = {
{ TESTA0, TESTA1, TESTA2, TESTA3 },
{ TESTA4, TESTA5, TESTA6, TESTA7 },
{ TESTA8, TESTA9, TESTA10, TESTA11 },
{ TESTA12, TESTA13, TESTA14, TESTA15 },
{ TESTA16, TESTA17, TESTA18, TESTA19 }
};
float32_t testsetb[5][4] = {
{ TESTB0, TESTB1, TESTB2, TESTB3 },
{ TESTB4, TESTB5, TESTB6, TESTB7 },
{ TESTB8, TESTB9, TESTB10, TESTB11 },
{ TESTB12, TESTB13, TESTB14, TESTB15 },
{ TESTB16, TESTB17, TESTB18, TESTB19 }
};
float32_t answset[5][4] = {
{ ANSW0, ANSW1, ANSW2, ANSW3 },
{ ANSW4, ANSW5, ANSW6, ANSW7 },
{ ANSW8, ANSW9, ANSW10, ANSW11 },
{ ANSW12, ANSW13, ANSW14, ANSW15 },
{ ANSW16, ANSW17, ANSW18, ANSW19 }
};
for (count = 0; count < 5; count++)
{
RUN_TEST (a, b, c, testseta, testsetb, answset, count, 128, 32, 4);
}
return 0;
}
/* { dg-final { scan-assembler-times "fdiv\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
#undef TESTA8
#undef ANSW8
#undef INFINITY
#define TESTA8 TESTA8_DBL
#define ANSW8 ANSW8_DBL
#define INFINITY DBL_INFINITY
int
test_vdivq_f64 ()
{
int count;
float64x2_t a;
float64x2_t b;
float64x2_t c;
float64_t testseta[10][2] = {
{ TESTA0, TESTA1 }, { TESTA2, TESTA3 },
{ TESTA4, TESTA5 }, { TESTA6, TESTA7 },
{ TESTA8, TESTA9 }, { TESTA10, TESTA11 },
{ TESTA12, TESTA13 }, { TESTA14, TESTA15 },
{ TESTA16, TESTA17 }, { TESTA18, TESTA19 }
};
float64_t testsetb[10][2] = {
{ TESTB0, TESTB1 }, { TESTB2, TESTB3 },
{ TESTB4, TESTB5 }, { TESTB6, TESTB7 },
{ TESTB8, TESTB9 }, { TESTB10, TESTB11 },
{ TESTB12, TESTB13 }, { TESTB14, TESTB15 },
{ TESTB16, TESTB17 }, { TESTB18, TESTB19 }
};
float64_t answset[10][2] = {
{ ANSW0, ANSW1 }, { ANSW2, ANSW3 },
{ ANSW4, ANSW5 }, { ANSW6, ANSW7 },
{ ANSW8, ANSW9 }, { ANSW10, ANSW11 },
{ ANSW12, ANSW13 }, { ANSW14, ANSW15 },
{ ANSW16, ANSW17 }, { ANSW18, ANSW19 }
};
for (count = 0; count < 10; count++)
{
RUN_TEST (a, b, c, testseta, testsetb, answset, count, 128, 64, 2);
}
return 0;
}
/* { dg-final { scan-assembler-times "fdiv\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
int
main (int argc, char **argv)
{
if (test_vdiv_f32 ())
abort ();
if (test_vdiv_f64 ())
abort ();
if (test_vdivq_f32 ())
abort ();
if (test_vdivq_f64 ())
abort ();
return 0;
}
|