1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
/* { dg-do compile } */
#include <altivec.h>
inline void
transpose4x4(vector float *matrix)
{
vector float v0, v1, v2, v3;
v0 = vec_mergeh(matrix[0], matrix[2]);
v1 = vec_mergel(matrix[0], matrix[2]);
v2 = vec_mergeh(matrix[1], matrix[3]);
v3 = vec_mergel(matrix[1], matrix[3]);
matrix[0] = vec_mergeh(v0, v2);
matrix[1] = vec_mergel(v0, v2);
matrix[2] = vec_mergeh(v1, v3);
matrix[3] = vec_mergel(v1, v3);
}
void
vec_ifft64(vector float *x0, vector float *x1)
{
int i;
vector float real[4], imag[4];
vector float c0r, c1r, c2r, c3r, c0i, c1i, c2i, c3i;
vector float d0r, d1r, d2r, d3r, d0i, d1i, d2i, d3i;
/*
* N=64
*
* Stage 1: t=1 => k = 0, j = 0..15
* ================================
* for j = 0:15
* c0 = x0(j+0*16);
* c1 = x0(j+1*16);
* c2 = x0(j+2*16);
* c3 = x0(j+3*16);
*
* d0 = c0 + c2;
* d1 = c0 - c2;
* d2 = c1 + c3;
* d3 = i*(c1 - c3);
*
* x1(4j+0) = d0 + d2;
* x1(4j+1) = d1 + d3;
* x1(4j+2) = d0 - d2;
* x1(4j+3) = d1 - d3;
* end
******************************************************/
for (i=0; i < 4; i++)
{
c0r = x0[i];
c1r = x0[i+4];
c2r = x0[i+8];
c3r = x0[i+12];
c0i = x0[i+16];
c1i = x0[i+20];
c2i = x0[i+24];
c3i = x0[i+28];
d0r = vec_add(c0r, c2r);
d1r = vec_sub(c0r, c2r);
d2r = vec_add(c1r, c3r);
d3r = vec_sub(c3i, c1i);
d0i = vec_add(c0i, c2i);
d1i = vec_sub(c0i, c2i);
d2i = vec_add(c1i, c3i);
d3i = vec_sub(c1r, c3r);
/* Calculate real{x1} */
real[0] = vec_add(d0r, d2r);
real[1] = vec_add(d1r, d3r);
real[2] = vec_sub(d0r, d2r);
real[3] = vec_sub(d1r, d3r);
transpose4x4(real);
/* Calculate imag{x1} */
imag[0] = vec_add(d0i, d2i);
imag[1] = vec_add(d1i, d3i);
imag[2] = vec_sub(d0i, d2i);
imag[3] = vec_sub(d1i, d3i);
transpose4x4(imag);
x1[4*i] = real[0];
x1[4*i+1] = real[1];
x1[4*i+2] = real[2];
x1[4*i+3] = real[3];
x1[4*i+16] = imag[0];
x1[4*i+17] = imag[1];
x1[4*i+18] = imag[2];
x1[4*i+19] = imag[3];
}
}
|