1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
/* PR rtl-optimization/28982. Function foo() does the equivalent of:
float tmp_results[NVARS];
for (int i = 0; i < NVARS; i++)
{
int inc = incs[i];
float *ptr = ptrs[i], result = 0;
for (int j = 0; j < n; j++)
result += *ptr, ptr += inc;
tmp_results[i] = result;
}
memcpy (results, tmp_results, sizeof (results));
but without the outermost loop. The idea is to create high register
pressure and ensure that some INC and PTR variables are spilled.
On ARM targets, sequences like "result += *ptr, ptr += inc" can
usually be implemented using (mem (post_modify ...)), and we do
indeed create such MEMs before reload for this testcase. However,
(post_modify ...) is not a valid address for coprocessor loads, so
for -mfloat-abi=softfp, reload reloads the POST_MODIFY into a base
register. GCC did not deal correctly with cases where the base and
index of the POST_MODIFY are themselves reloaded. */
#define NITER 4
#define NVARS 20
#define MULTI(X) \
X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 0
#define LOOP(INDEX) result##INDEX += *ptr##INDEX, ptr##INDEX += inc##INDEX
#define COPYOUT(INDEX) results[INDEX] = result##INDEX
float *ptrs[NVARS];
float results[NVARS];
int incs[NVARS];
void __attribute__((noinline))
foo (int n)
{
int MULTI (DECLAREI);
float MULTI (DECLAREF);
while (n--)
MULTI (LOOP);
MULTI (COPYOUT);
}
float input[NITER * NVARS];
int
main (void)
{
int i;
for (i = 0; i < NVARS; i++)
ptrs[i] = input + i, incs[i] = i;
for (i = 0; i < NITER * NVARS; i++)
input[i] = i;
foo (NITER);
for (i = 0; i < NVARS; i++)
if (results[i] != i * NITER * (NITER + 1) / 2)
return 1;
return 0;
}
|