diff options
author | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
---|---|---|
committer | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
commit | 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch) | |
tree | 976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/testsuite/gcc.target/i386/fma4-msubXX.c | |
download | cbb-gcc-4.6.4-upstream.tar.bz2 cbb-gcc-4.6.4-upstream.tar.xz |
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig;
imported gcc-4.6.4 source tree from verified upstream tarball.
downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.
if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
Diffstat (limited to 'gcc/testsuite/gcc.target/i386/fma4-msubXX.c')
-rw-r--r-- | gcc/testsuite/gcc.target/i386/fma4-msubXX.c | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/i386/fma4-msubXX.c b/gcc/testsuite/gcc.target/i386/fma4-msubXX.c new file mode 100644 index 000000000..eed75580e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma4-msubXX.c @@ -0,0 +1,134 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma4 } */ +/* { dg-options "-O0 -mfma4" } */ + +#include "fma4-check.h" + +#include <x86intrin.h> +#include <string.h> + +#define NUM 20 + +union +{ + __m128 x[NUM]; + float f[NUM * 4]; + __m128d y[NUM]; + double d[NUM * 2]; +} dst, res, src1, src2, src3; + +/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate + product is not rounded, only the addition is rounded. */ + +static void +init_msubps () +{ + int i; + for (i = 0; i < NUM * 4; i++) + { + src1.f[i] = i; + src2.f[i] = i + 10; + src3.f[i] = i + 20; + } +} + +static void +init_msubpd () +{ + int i; + for (i = 0; i < NUM * 4; i++) + { + src1.d[i] = i; + src2.d[i] = i + 10; + src3.d[i] = i + 20; + } +} + +static int +check_msubps () +{ + int i, j, check_fails = 0; + for (i = 0; i < NUM * 4; i = i + 4) + for (j = 0; j < 4; j++) + { + res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j]; + if (dst.f[i + j] != res.f[i + j]) + check_fails++; + } + return check_fails++; +} + +static int +check_msubpd () +{ + int i, j, check_fails = 0; + for (i = 0; i < NUM * 2; i = i + 2) + for (j = 0; j < 2; j++) + { + res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j]; + if (dst.d[i + j] != res.d[i + j]) + check_fails++; + } + return check_fails++; +} + + +static int +check_msubss () +{ + int i, j, check_fails = 0; + for (i = 0; i < NUM * 4; i = i + 4) + { + res.f[i] = (src1.f[i] * src2.f[i]) - src3.f[i]; + if (dst.f[i] != res.f[i]) + check_fails++; + } + return check_fails++; +} + +static int +check_msubsd () +{ + int i, j, check_fails = 0; + for (i = 0; i < NUM * 2; i = i + 2) + { + res.d[i] = (src1.d[i] * src2.d[i]) - src3.d[i]; + if (dst.d[i] != res.d[i]) + check_fails++; + } + return check_fails++; +} + +static void +fma4_test (void) +{ + int i; + + init_msubps (); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_msub_ps (src1.x[i], src2.x[i], src3.x[i]); + + if (check_msubps ()) + abort (); + + for (i = 0; i < NUM; i++) + dst.x[i] = _mm_msub_ss (src1.x[i], src2.x[i], src3.x[i]); + + if (check_msubss ()) + abort (); + + init_msubpd (); + + for (i = 0; i < NUM; i++) + dst.y[i] = _mm_msub_pd (src1.y[i], src2.y[i], src3.y[i]); + + if (check_msubpd ()) + abort (); + + for (i = 0; i < NUM; i++) + dst.y[i] = _mm_msub_sd (src1.y[i], src2.y[i], src3.y[i]); + + if (check_msubsd ()) + abort (); +} |