When testing for computer performance with different internal representation (kind), the code stays more or less the same, except the definition of the tested parameters (kind=1;kind=2). I have tried to build different modules.
module var_1
implicit none
real(8), allocatable :: x(:,:),xi(:,:),xt(:,:)
integer(kind=1), allocatable :: z(:,:)
end module var_1
module var_2
implicit none
real(8), allocatable :: x(:,:),xi(:,:),xt(:,:)
integer(kind=2), allocatable :: z(:,:)
end module var_2
Also there is a global module that defines the parameters that do not change:
module global
integer :: i,j,n,p,nProcessors,s,v,w,infodpotrf,infodpotri,mkl_get_max_threads
integer, dimension(3) :: ni = [100, 1000, 10000], pi = [100, 1000, 10000]
integer, dimension(5) :: nProcessorsi = [1, 2, 4, 6, 12]
real(8):: u,myone= 1.d0,t11,t22
real:: t2,t1
include 'omp_lib.h'
end module global
Than in program part we call subroutines defined later on:
program test
call matrix_multi_inv_1
call matrix_multi_inv_2
end program test
Subroutines:
subroutine matrix_multi_inv_1
use global
use var_1
open (unit=100,file="results.txt",status="unknown")
do s=1,5
nProcessors = nProcessorsi(s)
CALL OMP_SET_NUM_THREADS(nProcessors)
do v=1,3
n=ni(v)
do w=1,3
p=pi(w)
allocate(x(n,n),z(n,p),xi(n,n),xt(n,n))
do i=1,n
do j=1,p
call random_number(u)
z(i,j)=real(floor(u*3),8)
enddo
enddo
1000 format(3(a20),2(i10),2(f15.3),i10)
t11=omp_get_wtime()
call cpu_time(t1)
x=matmul(z,transpose(z))
t22=omp_get_wtime()
call cpu_time(t2)
write(100,1000) 'x_integer_kind_1', 'G_real_8', 'matmul', n, p, t22-t11,t2-t1, mkl_get_max_threads()
deallocate(x,z,xi,xt)
enddo
enddo
enddo
end subroutine matrix_multi_inv_1
subroutine matrix_multi_inv_2
use global
use var_1
open (unit=100,file="results.txt",status="unknown")
do s=1,5
nProcessors = nProcessorsi(s)
CALL OMP_SET_NUM_THREADS(nProcessors)
do v=1,3
n=ni(v)
do w=1,3
p=pi(w)
allocate(x(n,n),z(n,p),xi(n,n),xt(n,n))
do i=1,n
do j=1,p
call random_number(u)
z(i,j)=real(floor(u*3),8)
enddo
enddo
1000 format(3(a20),2(i10),2(f15.3),i10)
t11=omp_get_wtime()
call cpu_time(t1)
x=matmul(z,transpose(z))
t22=omp_get_wtime()
call cpu_time(t2)
write(100,1000) 'x_integer_kind_2', 'G_real_8', 'matmul', n, p, t22-t11,t2-t1, mkl_get_max_threads()
deallocate(x,z,xi,xt)
enddo
enddo
enddo
end subroutine matrix_multi_inv_2
And here comes the problem. Subroutines are exactly the same except for the call module part. I have tried to use a contain statement in the subroutine but this does not work if the inner subroutine is called. Also I have tried to use subroutine with attribute but my compiler reports an error:
A kind type parameter must be a compile-time constant.
Does anyone know a nice solution how to optimize the code. When 10 different variation of different internal representation is tested than this code becomes just too big.