summaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target/mips/mips-3d-9.c
diff options
context:
space:
mode:
authorupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
committerupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
commit554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/testsuite/gcc.target/mips/mips-3d-9.c
downloadcbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2
cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository.
Diffstat (limited to 'gcc/testsuite/gcc.target/mips/mips-3d-9.c')
-rw-r--r--gcc/testsuite/gcc.target/mips/mips-3d-9.c158
1 files changed, 158 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/mips/mips-3d-9.c b/gcc/testsuite/gcc.target/mips/mips-3d-9.c
new file mode 100644
index 000000000..3875391b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/mips-3d-9.c
@@ -0,0 +1,158 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mips3d" } */
+
+/* Matrix Multiplications */
+#include <stdlib.h>
+#include <stdio.h>
+
+typedef float v2sf __attribute__((vector_size(8)));
+
+float a[4] = {1.1, 2.2, 3.3, 4.4};
+float b[4][4] = {{1, 2, 3, 4},
+ {5, 6, 7, 8},
+ {9, 10, 11, 12},
+ {13, 14, 15, 16}};
+
+float c[4]; /* Result for matrix_multiply1() */
+float d[4]; /* Result for matrix_multiply2() */
+float e[4]; /* Result for matrix_multiply3() */
+float f[4]; /* Result for matrix_multiply4() */
+
+void matrix_multiply1();
+NOMIPS16 void matrix_multiply2();
+NOMIPS16 void matrix_multiply3();
+NOMIPS16 void matrix_multiply4();
+
+int main ()
+{
+ int i;
+
+ /* Version 1. Use float calculations */
+ matrix_multiply1();
+
+ /* Version 2. Use paired-single instructions inside the inner loop*/
+ matrix_multiply2();
+ for (i = 0; i < 4; i++)
+ if (d[i] != c[i])
+ abort();
+
+ /* Version 3. Use paired-single instructions and unroll the inner loop */
+ matrix_multiply3();
+ for (i = 0; i < 4; i++)
+ if (e[i] != c[i])
+ abort();
+
+ /* Version 4. Use paired-single instructions and unroll all loops */
+ matrix_multiply4();
+ for (i = 0; i < 4; i++)
+ if (f[i] != c[i])
+ abort();
+
+ printf ("Test Passes\n");
+ exit (0);
+}
+
+void matrix_multiply1()
+{
+ int i, j;
+
+ for (i = 0; i < 4; i++)
+ {
+ c[i] = 0.0;
+
+ for (j = 0; j < 4; j ++)
+ c[i] += a[j] * b[j][i];
+ }
+}
+
+NOMIPS16 void matrix_multiply2()
+{
+ int i, j;
+ v2sf m1, m2;
+ v2sf result, temp;
+
+ for (i = 0; i < 4; i++)
+ {
+ result = (v2sf) {0.0, 0.0};
+
+ for (j = 0; j < 4; j+=2)
+ {
+ /* Load two float values into m1 */
+ m1 = (v2sf) {a[j], a[j+1]};
+ m2 = (v2sf) {b[j][i], b[j+1][i]};
+
+ /* Multiply and add */
+ result += m1 * m2;
+ }
+
+ /* Reduction add at the end */
+ temp = __builtin_mips_addr_ps (result, result);
+ d[i] = __builtin_mips_cvt_s_pl (temp);
+ }
+}
+
+NOMIPS16 void matrix_multiply3()
+{
+ int i;
+ v2sf m1, m2, n1, n2;
+ v2sf result, temp;
+
+ m1 = (v2sf) {a[0], a[1]};
+ m2 = (v2sf) {a[2], a[3]};
+
+ for (i = 0; i < 4; i++)
+ {
+ n1 = (v2sf) {b[0][i], b[1][i]};
+ n2 = (v2sf) {b[2][i], b[3][i]};
+
+ /* Multiply and add */
+ result = m1 * n1 + m2 * n2;
+
+ /* Reduction add at the end */
+ temp = __builtin_mips_addr_ps (result, result);
+ e[i] = __builtin_mips_cvt_s_pl (temp);
+ }
+}
+
+NOMIPS16 void matrix_multiply4()
+{
+ v2sf m1, m2;
+ v2sf n1, n2, n3, n4, n5, n6, n7, n8;
+ v2sf temp1, temp2, temp3, temp4;
+ v2sf result1, result2;
+
+ /* Load a[0] a[1] values into m1
+ Load a[2] a[3] values into m2 */
+ m1 = (v2sf) {a[0], a[1]};
+ m2 = (v2sf) {a[2], a[3]};
+
+ /* Load b[0][0] b[1][0] values into n1
+ Load b[2][0] b[3][0] values into n2
+ Load b[0][1] b[1][1] values into n3
+ Load b[2][1] b[3][1] values into n4
+ Load b[0][2] b[1][2] values into n5
+ Load b[2][2] b[3][2] values into n6
+ Load b[0][3] b[1][3] values into n7
+ Load b[2][3] b[3][3] values into n8 */
+ n1 = (v2sf) {b[0][0], b[1][0]};
+ n2 = (v2sf) {b[2][0], b[3][0]};
+ n3 = (v2sf) {b[0][1], b[1][1]};
+ n4 = (v2sf) {b[2][1], b[3][1]};
+ n5 = (v2sf) {b[0][2], b[1][2]};
+ n6 = (v2sf) {b[2][2], b[3][2]};
+ n7 = (v2sf) {b[0][3], b[1][3]};
+ n8 = (v2sf) {b[2][3], b[3][3]};
+
+ temp1 = m1 * n1 + m2 * n2;
+ temp2 = m1 * n3 + m2 * n4;
+ temp3 = m1 * n5 + m2 * n6;
+ temp4 = m1 * n7 + m2 * n8;
+
+ result1 = __builtin_mips_addr_ps (temp1, temp2);
+ result2 = __builtin_mips_addr_ps (temp3, temp4);
+
+ f[0] = __builtin_mips_cvt_s_pu (result1);
+ f[1] = __builtin_mips_cvt_s_pl (result1);
+ f[2] = __builtin_mips_cvt_s_pu (result2);
+ f[3] = __builtin_mips_cvt_s_pl (result2);
+}