1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
/* { dg-do run } */
/* { dg-require-effective-target sse4a } */
/* { dg-options "-O2 -msse4a" } */
#include "sse4a-check.h"
#include <ammintrin.h>
typedef union
{
long long i[2];
__m128i vec;
} LI;
static long long
sse4a_test_insert (long long in1, long long in2)
{
__m128i v1,v2;
long long index_length, pad;
LI v_out;
index_length = 0x0000000000000810LL;
pad = 0x0;
v1 = _mm_set_epi64x (pad, in1);
v2 = _mm_set_epi64x (index_length, in2);
v_out.vec = _mm_insert_si64 (v1, v2);
return (v_out.i[0]);
}
static long long
sse4a_test_inserti (long long in1, long long in2)
{
__m128i v1,v2;
long long pad = 0x0;
LI v_out;
v1 = _mm_set_epi64x (pad, in1);
v2 = _mm_set_epi64x (pad, in2);
v_out.vec = _mm_inserti_si64 (v1, v2, (unsigned int) 0x10, (unsigned int) 0x08);
return (v_out.i[0]);
}
static chk (long long i1, long long i2)
{
int n_fails =0;
if (i1 != i2)
n_fails +=1;
return n_fails;
}
long long vals_in1[5] =
{
0x1234567887654321LL,
0x1456782093002490LL,
0x2340909123990390LL,
0x9595959599595999LL,
0x9099038798000029LL
};
long long vals_in2[5] =
{
0x9ABCDEF00FEDCBA9LL,
0x234567097289672ALL,
0x45476453097BD342LL,
0x23569012AE586FF0LL,
0x432567ABCDEF765DLL
};
long long vals_out[5] =
{
0x1234567887CBA921LL,
0x1456782093672A90LL,
0x2340909123D34290LL,
0x95959595996FF099LL,
0x9099038798765D29LL
};
static void
sse4a_test (void)
{
int i;
int fail = 0;
long long out;
for (i = 0; i < 5; i += 1)
{
out = sse4a_test_insert (vals_in1[i], vals_in2[i]);
fail += chk(out, vals_out[i]);
out = sse4a_test_inserti (vals_in1[i], vals_in2[i]);
fail += chk(out, vals_out[i]);
}
if (fail != 0)
abort ();
}
|