summaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target/i386/pr23570.c
diff options
context:
space:
mode:
authorupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
committerupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
commit554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/testsuite/gcc.target/i386/pr23570.c
downloadcbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2
cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository.
Diffstat (limited to 'gcc/testsuite/gcc.target/i386/pr23570.c')
-rw-r--r--gcc/testsuite/gcc.target/i386/pr23570.c92
1 files changed, 92 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/i386/pr23570.c b/gcc/testsuite/gcc.target/i386/pr23570.c
new file mode 100644
index 000000000..1542663fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr23570.c
@@ -0,0 +1,92 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+typedef float __m128 __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+
+static __inline __m128
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+static __inline __m128
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+ return __extension__ (__m128)(__v4sf){__Z, __Y, __X, __W };
+}
+
+static __inline __m128
+_mm_and_si128 (__m128 __A, __m128 __B)
+{
+ return (__m128)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+}
+
+static __inline __m128
+_mm_or_si128 (__m128 __A, __m128 __B)
+{
+ return (__m128)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+}
+
+typedef union
+{
+ __m128 xmmi;
+ int si[4];
+}
+__attribute__ ((aligned (16))) um128;
+
+um128 u;
+
+static inline int
+sse_max_abs_indexf (float *v, int step, int n)
+{
+ __m128 m1, mm;
+ __m128 mim, mi, msk;
+ um128 u, ui;
+ int n4, step2, step3;
+ mm = __builtin_ia32_andps ((__m128) (__v4sf)
+ { 0.0, v[step], v[step2], v[step3] }
+ , u.xmmi);
+ if (n4)
+ {
+ int i;
+ for (i = 0; i < n4; ++i);
+ msk = (__m128) _mm_cmpeq_ps (m1, mm);
+ mim = _mm_or_si128 (_mm_and_si128 (msk, mi), mim);
+ }
+ ui.xmmi = (__m128) mim;
+ return ui.si[n];
+}
+
+static void
+sse_swap_rowf (float *r1, float *r2, int n)
+{
+ int n4 = (n / 4) * 4;
+ float *r14end = r1 + n4;
+ while (r1 < r14end)
+ {
+ *r1 = *r2;
+ r1++;
+ }
+}
+
+void
+ludcompf (float *m, int nw, int *prow, int n)
+{
+ int i, s = 0;
+ float *pm;
+ for (i = 0, pm = m; i < n - 1; ++i, pm += nw)
+ {
+ int vi = sse_max_abs_indexf (pm + i, nw, n - i);
+ float *pt;
+ int j;
+ if (vi != 0)
+ {
+ sse_swap_rowf (pm, pm + vi * nw, nw);
+ swap_index (prow, i, i + vi);
+ }
+ for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw)
+ sse_add_rowf (pt + i + 1, pm + i + 1, -1.0, n - i - 1);
+ }
+}