1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
/* { dg-do run } */
/* { dg-require-effective-target sse3 } */
/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
#ifndef CHECK_H
#define CHECK_H "sse3-check.h"
#endif
#ifndef TEST
#define TEST sse3_test
#endif
#include CHECK_H
#include <pmmintrin.h>
static void
sse3_test_movddup_mem (double *i1, double *r)
{
__m128d t1 = _mm_loaddup_pd (i1);
_mm_storeu_pd (r, t1);
}
static double cnst1 [2] = {1.0, 1.0};
static void
sse3_test_movddup_reg (double *i1, double *r)
{
__m128d t1 = _mm_loadu_pd (i1);
__m128d t2 = _mm_loadu_pd (&cnst1[0]);
t1 = _mm_mul_pd (t1, t2);
t2 = _mm_movedup_pd (t1);
_mm_storeu_pd (r, t2);
}
static void
sse3_test_movddup_reg_subsume_unaligned (double *i1, double *r)
{
__m128d t1 = _mm_loadu_pd (i1);
__m128d t2 = _mm_movedup_pd (t1);
_mm_storeu_pd (r, t2);
}
static void
sse3_test_movddup_reg_subsume_ldsd (double *i1, double *r)
{
__m128d t1 = _mm_load_sd (i1);
__m128d t2 = _mm_movedup_pd (t1);
_mm_storeu_pd (r, t2);
}
static void
sse3_test_movddup_reg_subsume (double *i1, double *r)
{
__m128d t1 = _mm_load_pd (i1);
__m128d t2 = _mm_movedup_pd (t1);
_mm_storeu_pd (r, t2);
}
static int
chk_pd (double *v1, double *v2)
{
int i;
int n_fails = 0;
for (i = 0; i < 2; i++)
if (v1[i] != v2[i])
n_fails += 1;
return n_fails;
}
static double p1[2] __attribute__ ((aligned(16)));
static double p2[2];
static double ck[2];
static double vals[80] =
{
100.0, 200.0, 300.0, 400.0, 5.0, -1.0, .345, -21.5,
1100.0, 0.235, 321.3, 53.40, 0.3, 10.0, 42.0, 32.52,
32.6, 123.3, 1.234, 2.156, 0.1, 3.25, 4.75, 32.44,
12.16, 52.34, 64.12, 71.13, -.1, 2.30, 5.12, 3.785,
541.3, 321.4, 231.4, 531.4, 71., 321., 231., -531.,
23.45, 23.45, 23.45, 23.45, 23.45, 23.45, 23.45, 23.45,
23.45, -1.43, -6.74, 6.345, -20.1, -20.1, -40.1, -40.1,
1.234, 2.345, 3.456, 4.567, 5.678, 6.789, 7.891, 8.912,
-9.32, -8.41, -7.50, -6.59, -5.68, -4.77, -3.86, -2.95,
9.32, 8.41, 7.50, 6.59, -5.68, -4.77, -3.86, -2.95
};
static void
TEST (void)
{
int i;
int fail = 0;
for (i = 0; i < 80; i += 1)
{
p1[0] = vals[i+0];
ck[0] = p1[0];
ck[1] = p1[0];
sse3_test_movddup_mem (p1, p2);
fail += chk_pd (ck, p2);
sse3_test_movddup_reg (p1, p2);
fail += chk_pd (ck, p2);
sse3_test_movddup_reg_subsume (p1, p2);
fail += chk_pd (ck, p2);
sse3_test_movddup_reg_subsume_unaligned (p1, p2);
fail += chk_pd (ck, p2);
sse3_test_movddup_reg_subsume_ldsd (p1, p2);
fail += chk_pd (ck, p2);
}
if (fail != 0)
abort ();
}
|