forked from edf-hpc/verrou
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvr_instrumentOp_impl.h
472 lines (404 loc) · 17 KB
/
vr_instrumentOp_impl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
Vr_instr_kind res;
res.containFloatCmp=False;
res.containFloatModOp=False;
switch (op) {
// Addition
// - Double precision
case Iop_AddF64: // Scalar
res.containFloatModOp=vr_replaceBinFpOpScal (sb, stmt, expr, bcNameWithCC(add64F), VR_OP_ADD, VR_PREC_DBL, VR_VEC_SCAL, countOnly);
break;
case Iop_Add64F0x2: // 128b vector, lowest-lane-only
res.containFloatModOp=vr_replaceBinFpOpLLO (sb, stmt, expr, bcNameWithCC(add64FLLO), VR_OP_ADD, VR_PREC_DBL, VR_VEC_LLO, countOnly);
break;
case Iop_Add64Fx2: // 128b vector, 2 lanes
res.containFloatModOp=vr_replaceBinFullSSE (sb, stmt, expr, bcNameWithCC(add64Fx2), VR_OP_ADD, VR_PREC_DBL, VR_VEC_FULL2, countOnly);
break;
case Iop_AddF32: // Scalar
res.containFloatModOp=vr_replaceBinFpOpScal (sb, stmt, expr, bcNameWithCC(add32F), VR_OP_ADD, VR_PREC_FLT, VR_VEC_SCAL, countOnly);
break;
case Iop_Add32F0x4: // 128b vector, lowest-lane-only
res.containFloatModOp=vr_replaceBinFpOpLLO (sb, stmt, expr, bcNameWithCC(add32FLLO), VR_OP_ADD, VR_PREC_FLT, VR_VEC_LLO, countOnly);
break;
case Iop_Add32Fx4: // 128b vector, 4 lanes
res.containFloatModOp=vr_replaceBinFullSSE (sb, stmt, expr, bcNameWithCC(add32Fx4), VR_OP_ADD, VR_PREC_FLT, VR_VEC_FULL4, countOnly);
break;
case Iop_Add64Fx4: //AVX double
res.containFloatModOp=vr_replaceBinFullAVX(sb, stmt, expr, bcNameWithCC(add64Fx4), VR_OP_ADD, VR_PREC_DBL, VR_VEC_FULL4, countOnly);
break;
case Iop_Add32Fx8: //AVX Float
res.containFloatModOp=vr_replaceBinFullAVX(sb, stmt, expr, bcNameWithCC(add32Fx8), VR_OP_ADD, VR_PREC_FLT, VR_VEC_FULL8, countOnly);
break;
// Subtraction
// - Double precision
case Iop_SubF64: // Scalar
res.containFloatModOp= vr_replaceBinFpOpScal (sb, stmt, expr, bcNameWithCC(sub64F), VR_OP_SUB, VR_PREC_DBL, VR_VEC_SCAL, countOnly);
break;
case Iop_Sub64F0x2: // 128b vector, lowest-lane only
res.containFloatModOp= vr_replaceBinFpOpLLO (sb, stmt, expr, bcNameWithCC(sub64FLLO), VR_OP_SUB, VR_PREC_DBL, VR_VEC_LLO, countOnly);
break;
case Iop_Sub64Fx2:
res.containFloatModOp= vr_replaceBinFullSSE (sb, stmt, expr, bcNameWithCC(sub64Fx2), VR_OP_SUB, VR_PREC_DBL, VR_VEC_FULL2, countOnly);
break;
case Iop_SubF32: // Scalar
res.containFloatModOp= vr_replaceBinFpOpScal (sb, stmt, expr, bcNameWithCC(sub32F), VR_OP_SUB, VR_PREC_FLT, VR_VEC_SCAL, countOnly);
break;
case Iop_Sub32F0x4: // 128b vector, lowest-lane-only
res.containFloatModOp= vr_replaceBinFpOpLLO (sb, stmt, expr, bcNameWithCC(sub32FLLO), VR_OP_SUB, VR_PREC_FLT, VR_VEC_LLO, countOnly);
break;
case Iop_Sub32Fx4: // 128b vector, 4 lanes
res.containFloatModOp= vr_replaceBinFullSSE (sb, stmt, expr, bcNameWithCC(sub32Fx4), VR_OP_SUB, VR_PREC_FLT, VR_VEC_FULL4, countOnly);
break;
case Iop_Sub64Fx4: //AVX double
res.containFloatModOp= vr_replaceBinFullAVX(sb, stmt, expr, bcNameWithCC(sub64Fx4), VR_OP_SUB, VR_PREC_DBL, VR_VEC_FULL4, countOnly);
break;
case Iop_Sub32Fx8: //AVX Float
res.containFloatModOp=vr_replaceBinFullAVX(sb, stmt, expr, bcNameWithCC(sub32Fx8), VR_OP_SUB, VR_PREC_FLT, VR_VEC_FULL8, countOnly);
break;
// Multiplication
// - Double precision
case Iop_MulF64: // Scalar
res.containFloatModOp=vr_replaceBinFpOpScal (sb, stmt, expr, bcName(mul64F), VR_OP_MUL, VR_PREC_DBL, VR_VEC_SCAL, countOnly);
break;
case Iop_Mul64F0x2: // 128b vector, lowest-lane-only
res.containFloatModOp=vr_replaceBinFpOpLLO (sb, stmt, expr, bcName(mul64FLLO), VR_OP_MUL, VR_PREC_DBL, VR_VEC_LLO, countOnly);
break;
case Iop_Mul64Fx2: // 128b vector, 2 lanes
res.containFloatModOp= vr_replaceBinFullSSE (sb, stmt, expr, bcName(mul64Fx2), VR_OP_MUL, VR_PREC_DBL, VR_VEC_FULL2, countOnly);
break;
case Iop_MulF32: // Scalar
res.containFloatModOp= vr_replaceBinFpOpScal (sb, stmt, expr, bcName(mul32F), VR_OP_MUL, VR_PREC_FLT, VR_VEC_SCAL, countOnly);
break;
case Iop_Mul32F0x4: // 128b vector, lowest-lane-only
res.containFloatModOp= vr_replaceBinFpOpLLO (sb, stmt, expr, bcName(mul32FLLO), VR_OP_MUL, VR_PREC_FLT, VR_VEC_LLO, countOnly);
break;
case Iop_Mul32Fx4: // 128b vector, 4 lanes
res.containFloatModOp= vr_replaceBinFullSSE (sb, stmt, expr, bcName(mul32Fx4), VR_OP_MUL, VR_PREC_FLT, VR_VEC_FULL4, countOnly);
break;
case Iop_Mul64Fx4: //AVX double
res.containFloatModOp= vr_replaceBinFullAVX(sb, stmt, expr, bcName(mul64Fx4), VR_OP_MUL, VR_PREC_DBL, VR_VEC_FULL4, countOnly);
break;
case Iop_Mul32Fx8: //AVX Float
res.containFloatModOp= vr_replaceBinFullAVX(sb, stmt, expr, bcName(mul32Fx8), VR_OP_MUL, VR_PREC_FLT, VR_VEC_FULL8, countOnly);
break;
case Iop_DivF32:
res.containFloatModOp= vr_replaceBinFpOpScal (sb, stmt, expr, bcName(div32F), VR_OP_DIV, VR_PREC_FLT, VR_VEC_SCAL, countOnly);
break;
case Iop_Div32F0x4: // 128b vector, lowest-lane-only
res.containFloatModOp=vr_replaceBinFpOpLLO (sb, stmt, expr, bcName(div32FLLO), VR_OP_DIV, VR_PREC_FLT, VR_VEC_LLO, countOnly);
break;
case Iop_Div32Fx4: // 128b vector, 4 lanes
res.containFloatModOp= vr_replaceBinFullSSE (sb, stmt, expr, bcName(div32Fx4), VR_OP_DIV, VR_PREC_FLT, VR_VEC_FULL4, countOnly);
break;
case Iop_DivF64: // Scalar
res.containFloatModOp=vr_replaceBinFpOpScal (sb, stmt, expr, bcName(div64F), VR_OP_DIV, VR_PREC_DBL, VR_VEC_SCAL, countOnly);
break;
case Iop_Div64F0x2: // 128b vector, lowest-lane-only
res.containFloatModOp= vr_replaceBinFpOpLLO (sb, stmt, expr, bcName(div64FLLO), VR_OP_DIV, VR_PREC_DBL, VR_VEC_LLO, countOnly);
break;
case Iop_Div64Fx2: // 128b vector, 2 lanes
res.containFloatModOp= vr_replaceBinFullSSE(sb, stmt, expr, bcName(div64Fx2), VR_OP_DIV, VR_PREC_DBL, VR_VEC_FULL2, countOnly);
break;
case Iop_Div64Fx4: //AVX double
res.containFloatModOp= vr_replaceBinFullAVX(sb, stmt, expr, bcName(div64Fx4), VR_OP_DIV, VR_PREC_DBL, VR_VEC_FULL4, countOnly);
break;
case Iop_Div32Fx8: //AVX Float
res.containFloatModOp= vr_replaceBinFullAVX(sb, stmt, expr, bcName(div32Fx8), VR_OP_DIV, VR_PREC_FLT, VR_VEC_FULL8, countOnly);
break;
case Iop_MAddF32:
#ifndef IGNOREFMA
res.containFloatModOp=vr_replaceFMA (sb, stmt, expr, bcNameWithCC(madd32F), VR_OP_MADD, VR_PREC_FLT, countOnly);
break;
#else
vr_countOp (sb, VR_OP_MADD, VR_PREC_FLT, VR_VEC_UNK,False);
res.containFloatModOp=False;
addStmtToIRSB (sb, stmt);
break;
#endif
case Iop_MSubF32:
#ifndef IGNOREFMA
res.containFloatModOp=vr_replaceFMA (sb, stmt, expr, bcNameWithCC(msub32F), VR_OP_MSUB, VR_PREC_FLT, countOnly);
break;
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_MSUB, VR_PREC_FLT, VR_VEC_UNK,False);
addStmtToIRSB (sb, stmt);
break;
#endif
case Iop_MAddF64:
#ifndef IGNOREFMA
res.containFloatModOp=vr_replaceFMA (sb, stmt, expr, bcNameWithCC(madd64F), VR_OP_MADD, VR_PREC_DBL, countOnly);
break;
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_MADD, VR_PREC_DBL, VR_VEC_UNK,False);
addStmtToIRSB (sb, stmt);
break;
#endif
case Iop_MSubF64:
#ifndef IGNOREFMA
res.containFloatModOp= vr_replaceFMA (sb, stmt, expr, bcNameWithCC(msub64F), VR_OP_MSUB, VR_PREC_DBL, countOnly);
break;
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_MSUB, VR_PREC_DBL, VR_VEC_UNK,False);
addStmtToIRSB (sb, stmt);
break;
#endif
// Other FP operations
case Iop_Add32Fx2:
vr_countOp (sb, VR_OP_ADD, VR_PREC_FLT, VR_VEC_FULL2,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Sub32Fx2:
vr_countOp (sb, VR_OP_SUB, VR_PREC_FLT, VR_VEC_FULL2,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_CmpF64:
res.containFloatCmp=True;
vr_countOp (sb, VR_OP_CMP, VR_PREC_DBL, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_CmpF32:
res.containFloatCmp=True;
vr_countOp (sb, VR_OP_CMP, VR_PREC_FLT, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_F32toF64: /* F32 -> F64 */
vr_countOp (sb, VR_OP_CONV, VR_PREC_FLT_TO_DBL, VR_VEC_UNK,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_F64toF32:
#ifndef IGNORECAST
res.containFloatModOp=vr_replaceCast (sb, stmt, expr, bcName(cast64FTo32F), VR_OP_CONV, VR_PREC_DBL_TO_FLT, countOnly);
break;
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_CONV, VR_PREC_DBL_TO_FLT, VR_VEC_UNK,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_F64toI64S: /* IRRoundingMode(I32) x F64 -> signed I64 */
vr_countOp (sb, VR_OP_CONV, VR_PREC_DBL_TO_INT, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_F64toI64U: /* IRRoundingMode(I32) x F64 -> unsigned I64 */
vr_countOp (sb, VR_OP_CONV, VR_PREC_DBL_TO_INT, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_F64toI32S: /* IRRoundingMode(I32) x F64 -> signed I32 */
vr_countOp (sb, VR_OP_CONV, VR_PREC_DBL_TO_SHT, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_F64toI32U: /* IRRoundingMode(I32) x F64 -> unsigned I32 */
vr_countOp (sb, VR_OP_CONV, VR_PREC_DBL_TO_SHT, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
break;
/******/
case Iop_Max32Fx4:
vr_countOp (sb, VR_OP_MAX, VR_PREC_FLT, VR_VEC_FULL4,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Max32F0x4:
vr_countOp (sb, VR_OP_MAX, VR_PREC_FLT, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Max64Fx2:
vr_countOp (sb, VR_OP_MAX, VR_PREC_DBL, VR_VEC_FULL2,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Max64F0x2:
vr_countOp (sb, VR_OP_MAX, VR_PREC_DBL, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Min32Fx4:
vr_countOp (sb, VR_OP_MIN, VR_PREC_FLT, VR_VEC_FULL4,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Min32F0x4:
vr_countOp (sb, VR_OP_MIN, VR_PREC_FLT, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Min64Fx2:
vr_countOp (sb, VR_OP_MIN, VR_PREC_DBL, VR_VEC_FULL2,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_Min64F0x2:
vr_countOp (sb, VR_OP_MIN, VR_PREC_DBL, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_CmpEQ64Fx2: case Iop_CmpLT64Fx2:
case Iop_CmpLE64Fx2: case Iop_CmpUN64Fx2:
res.containFloatCmp=True;
vr_countOp (sb, VR_OP_CMP, VR_PREC_DBL, VR_VEC_FULL2,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_CmpEQ64F0x2: case Iop_CmpLT64F0x2:
case Iop_CmpLE64F0x2: case Iop_CmpUN64F0x2:
res.containFloatCmp=True;
vr_countOp (sb, VR_OP_CMP, VR_PREC_DBL, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_CmpEQ32Fx4: case Iop_CmpLT32Fx4:
case Iop_CmpLE32Fx4: case Iop_CmpUN32Fx4:
case Iop_CmpGT32Fx4: case Iop_CmpGE32Fx4:
res.containFloatCmp=True;
vr_countOp (sb, VR_OP_CMP, VR_PREC_FLT, VR_VEC_FULL4,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_CmpEQ32F0x4: case Iop_CmpLT32F0x4:
case Iop_CmpLE32F0x4: case Iop_CmpUN32F0x4:
res.containFloatCmp=True;
vr_countOp (sb, VR_OP_CMP, VR_PREC_FLT, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
break;
case Iop_SqrtF64:
#ifndef IGNORESQRT
res.containFloatModOp=vr_replaceBinFpOpScal_unary (sb, stmt, expr, bcName(sqrt64F), VR_OP_SQRT, VR_PREC_DBL, VR_VEC_SCAL, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_DBL, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_Sqrt64F0x2:
#ifndef IGNORESQRT
res.containFloatModOp= vr_replaceBinFpOpLLO_unary (sb, stmt, expr, bcName(sqrt64FLLO), VR_OP_SQRT, VR_PREC_DBL, VR_VEC_LLO, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_DBL, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_Sqrt64Fx2:
#ifndef IGNORESQRT
res.containFloatModOp= vr_replaceBinFullSSE_unary(sb, stmt, expr, bcName(sqrt64Fx2), VR_OP_SQRT, VR_PREC_DBL, VR_VEC_FULL2, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_DBL, VR_VEC_FULL2,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_Sqrt64Fx4:
#ifndef IGNORESQRT
res.containFloatModOp= vr_replaceBinFullAVX_unary(sb, stmt, expr, bcName(sqrt64Fx4), VR_OP_SQRT, VR_PREC_DBL, VR_VEC_FULL4, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_DBL, VR_VEC_FULL4,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_SqrtF32:
#ifndef IGNORESQRT
res.containFloatModOp= vr_replaceBinFpOpScal_unary (sb, stmt, expr, bcName(sqrt32F), VR_OP_SQRT, VR_PREC_FLT, VR_VEC_SCAL, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_FLT, VR_VEC_SCAL,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_Sqrt32F0x4:
#ifndef IGNORESQRT
res.containFloatModOp=vr_replaceBinFpOpLLO_unary (sb, stmt, expr, bcName(sqrt32FLLO), VR_OP_SQRT, VR_PREC_FLT, VR_VEC_LLO, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_FLT, VR_VEC_LLO,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_Sqrt32Fx4:
#ifndef IGNORESQRT
res.containFloatModOp= vr_replaceBinFullSSE_unary (sb, stmt, expr, bcName(sqrt32Fx4), VR_OP_SQRT, VR_PREC_FLT, VR_VEC_FULL4, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_FLT, VR_VEC_FULL4,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_Sqrt32Fx8:
#ifndef IGNORESQRT
res.containFloatModOp= vr_replaceBinFullAVX_unary(sb, stmt, expr, bcName(sqrt32Fx8), VR_OP_SQRT, VR_PREC_FLT, VR_VEC_FULL8, countOnly);
#else
res.containFloatModOp=False;
vr_countOp (sb, VR_OP_SQRT, VR_PREC_FLT, VR_VEC_FULL8,False);
addStmtToIRSB (sb, stmt);
#endif
break;
case Iop_ReinterpF64asI64:
case Iop_ReinterpI64asF64:
case Iop_ReinterpF32asI32:
case Iop_ReinterpI32asF32:
case Iop_NegF64:
case Iop_AbsF64:
case Iop_NegF32:
case Iop_AbsF32:
case Iop_Abs64Fx2:
case Iop_Neg64Fx2:
//ignored : not counted and not instrumented
addStmtToIRSB (sb, stmt);
break;
case Iop_SqrtF128:
case Iop_Sqrt16Fx8:
//operation with 64bit register with 32bit rounding
case Iop_AddF64r32:
case Iop_SubF64r32:
case Iop_MulF64r32:
case Iop_DivF64r32:
case Iop_MAddF64r32:
case Iop_MSubF64r32:
//operation with 128bit
case Iop_AddF128:
case Iop_SubF128:
case Iop_MulF128:
case Iop_DivF128:
case Iop_AtanF64: /* FPATAN, arctan(arg1/arg2) */
case Iop_Yl2xF64: /* FYL2X, arg1 * log2(arg2) */
case Iop_Yl2xp1F64: /* FYL2XP1, arg1 * log2(arg2+1.0) */
case Iop_PRemF64: /* FPREM, non-IEEE remainder(arg1/arg2) */
case Iop_PRemC3210F64: /* C3210 flags resulting from FPREM: :: I32 */
case Iop_PRem1F64: /* FPREM1, IEEE remainder(arg1/arg2) */
case Iop_PRem1C3210F64: /* C3210 flags resulting from FPREM1, :: I32 */
case Iop_ScaleF64: /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */
case Iop_SinF64: /* FSIN */
case Iop_CosF64: /* FCOS */
case Iop_TanF64: /* FTAN */
case Iop_2xm1F64: /* (2^arg - 1.0) */
case Iop_RSqrtEst5GoodF64: /* reciprocal square root estimate, 5 good bits */
case Iop_Log2_64Fx2:
case Iop_Scale2_64Fx2:
case Iop_RecipEst64Fx2: // unary
case Iop_RecipStep64Fx2: // binary
case Iop_RSqrtEst64Fx2: // unary
case Iop_RSqrtStep64Fx2: // binary
case Iop_RecipStep32Fx4:
case Iop_RSqrtEst32Fx4:
case Iop_RSqrtStep32Fx4:
case Iop_RecipEst32F0x4:
case Iop_RSqrtEst32F0x4:
case Iop_Scale2_32Fx4:
case Iop_Log2_32Fx4:
case Iop_Exp2_32Fx4:
/*AVX*/
case Iop_RSqrtEst32Fx8:
case Iop_RecipEst32Fx8:
case Iop_RoundF64toF64_NEAREST: /* frin */
case Iop_RoundF64toF64_NegINF: /* frim */
case Iop_RoundF64toF64_PosINF: /* frip */
case Iop_RoundF64toF64_ZERO: /* friz */
case Iop_F32toF16x4:
case Iop_F16toF32x4:
case Iop_F16toF64x2:
case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
case Iop_F64toI16S: /* IRRoundingMode(I32) x F64 -> signed I16 */
case Iop_CmpF128:
case Iop_PwMax32Fx4: case Iop_PwMin32Fx4:
if(!countOnly){
vr_maybe_record_ErrorOp (VR_ERROR_UNCOUNTED, op);
}
addStmtToIRSB (sb, stmt);
break;
default:
addStmtToIRSB (sb, stmt);
break;
}
return res;