summaryrefslogtreecommitdiff
path: root/libgomp/testsuite/libgomp.fortran/strassen.f90
diff options
context:
space:
mode:
authorupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
committerupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
commit554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree976dc5ab7fddf506dadce60ae936f43f58787092 /libgomp/testsuite/libgomp.fortran/strassen.f90
downloadcbb-gcc-4.6.4-upstream.tar.bz2
cbb-gcc-4.6.4-upstream.tar.xz
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository.
Diffstat (limited to 'libgomp/testsuite/libgomp.fortran/strassen.f90')
-rw-r--r--libgomp/testsuite/libgomp.fortran/strassen.f9075
1 files changed, 75 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.fortran/strassen.f90 b/libgomp/testsuite/libgomp.fortran/strassen.f90
new file mode 100644
index 000000000..b44982665
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/strassen.f90
@@ -0,0 +1,75 @@
+! { dg-options "-O2" }
+
+program strassen_matmul
+ use omp_lib
+ integer, parameter :: N = 1024
+ double precision, save :: A(N,N), B(N,N), C(N,N), D(N,N)
+ double precision :: start, end
+
+ call random_seed
+ call random_number (A)
+ call random_number (B)
+ start = omp_get_wtime ()
+ C = matmul (A, B)
+ end = omp_get_wtime ()
+ write(*,'(a, f10.6)') ' Time for matmul = ', end - start
+ D = 0
+ start = omp_get_wtime ()
+ call strassen (A, B, D, N)
+ end = omp_get_wtime ()
+ write(*,'(a, f10.6)') ' Time for Strassen = ', end - start
+ if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort
+ D = 0
+ start = omp_get_wtime ()
+!$omp parallel
+!$omp single
+ call strassen (A, B, D, N)
+!$omp end single nowait
+!$omp end parallel
+ end = omp_get_wtime ()
+ write(*,'(a, f10.6)') ' Time for Strassen MP = ', end - start
+ if (sqrt (sum ((C - D) ** 2)) / N .gt. 0.1) call abort
+
+contains
+
+ recursive subroutine strassen (A, B, C, N)
+ integer, intent(in) :: N
+ double precision, intent(in) :: A(N,N), B(N,N)
+ double precision, intent(out) :: C(N,N)
+ double precision :: T(N/2,N/2,7)
+ integer :: K, L
+
+ if (iand (N,1) .ne. 0 .or. N < 64) then
+ C = matmul (A, B)
+ return
+ end if
+ K = N / 2
+ L = N / 2 + 1
+!$omp task shared (A, B, T)
+ call strassen (A(:K,:K) + A(L:,L:), B(:K,:K) + B(L:,L:), T(:,:,1), K)
+!$omp end task
+!$omp task shared (A, B, T)
+ call strassen (A(L:,:K) + A(L:,L:), B(:K,:K), T(:,:,2), K)
+!$omp end task
+!$omp task shared (A, B, T)
+ call strassen (A(:K,:K), B(:K,L:) - B(L:,L:), T(:,:,3), K)
+!$omp end task
+!$omp task shared (A, B, T)
+ call strassen (A(L:,L:), B(L:,:K) - B(:K,:K), T(:,:,4), K)
+!$omp end task
+!$omp task shared (A, B, T)
+ call strassen (A(:K,:K) + A(:K,L:), B(L:,L:), T(:,:,5), K)
+!$omp end task
+!$omp task shared (A, B, T)
+ call strassen (A(L:,:K) - A(:K,:K), B(:K,:K) + B(:K,L:), T(:,:,6), K)
+!$omp end task
+!$omp task shared (A, B, T)
+ call strassen (A(:K,L:) - A(L:,L:), B(L:,:K) + B(L:,L:), T(:,:,7), K)
+!$omp end task
+!$omp taskwait
+ C(:K,:K) = T(:,:,1) + T(:,:,4) - T(:,:,5) + T(:,:,7)
+ C(L:,:K) = T(:,:,2) + T(:,:,4)
+ C(:K,L:) = T(:,:,3) + T(:,:,5)
+ C(L:,L:) = T(:,:,1) - T(:,:,2) + T(:,:,3) + T(:,:,6)
+ end subroutine strassen
+end