diff options
Diffstat (limited to 'gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c')
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c new file mode 100644 index 000000000..77232567c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-dpps-1.c @@ -0,0 +1,114 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define lmskN 0x00 +#define lmsk0 0x01 +#define lmsk1 0x02 +#define lmsk2 0x04 +#define lmsk3 0x08 +#define lmsk01 0x03 +#define lmsk02 0x05 +#define lmsk03 0x09 +#define lmsk12 0x06 +#define lmsk13 0x0A +#define lmsk23 0x0C +#define lmskA 0x0F + +#define hmskN 0x00 +#define hmskA 0xF0 +#define hmsk0 0x10 +#define hmsk1 0x20 +#define hmsk2 0x40 +#define hmsk3 0x80 +#define hmsk01 0x30 +#define hmsk02 0x50 +#define hmsk03 0x90 +#define hmsk12 0x60 +#define hmsk13 0xA0 +#define hmsk23 0xC0 + +#ifndef HIMASK +#define HIMASK hmskA +#endif + +static void +TEST (void) +{ + union + { + __m128 x; + float f[4]; + } val1, val2, res[16]; + int masks[16]; + int i, j; + + val1.f[0] = 2.; + val1.f[1] = 3.; + val1.f[2] = 4.; + val1.f[3] = 5.; + + val2.f[0] = 10.; + val2.f[1] = 100.; + val2.f[2] = 1000.; + val2.f[3] = 10000.; + + res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0); + res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1); + res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2); + res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3); + res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01); + res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02); + res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03); + res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12); + res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13); + res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23); + res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0)); + res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1)); + res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2)); + res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3)); + res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN); + res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA); + + masks[0] = HIMASK | lmsk0; + masks[1] = HIMASK | lmsk1; + masks[2] = HIMASK | lmsk2; + masks[3] = HIMASK | lmsk3; + masks[4] = HIMASK | lmsk01; + masks[5] = HIMASK | lmsk02; + masks[6] = HIMASK | lmsk03; + masks[7] = HIMASK | lmsk12; + masks[8] = HIMASK | lmsk13; + masks[9] = HIMASK | lmsk23; + masks[10] = HIMASK | (0x0F & ~lmsk0); + masks[11] = HIMASK | (0x0F & ~lmsk1); + masks[12] = HIMASK | (0x0F & ~lmsk2); + masks[13] = HIMASK | (0x0F & ~lmsk3); + masks[14] = HIMASK | lmskN; + masks[15] = HIMASK | lmskA; + + for (i = 0; i <= 15; i++) + { + float tmp = 0.; + + for (j = 0; j < 4; j++) + if ((HIMASK & (0x10 << j))) + tmp += val1.f[j] * val2.f[j]; + + for (j = 0; j < 4; j++) + if ((masks[i] & (1 << j)) && res[i].f[j] != tmp) + abort (); + } +} |