1
votes

When testing for computer performance with different internal representation (kind), the code stays more or less the same, except the definition of the tested parameters (kind=1;kind=2). I have tried to build different modules.

    module var_1
    implicit none
    real(8), allocatable :: x(:,:),xi(:,:),xt(:,:)
    integer(kind=1), allocatable :: z(:,:)
    end module var_1

    module var_2
    implicit none
    real(8), allocatable :: x(:,:),xi(:,:),xt(:,:)
    integer(kind=2), allocatable :: z(:,:)
    end module var_2

Also there is a global module that defines the parameters that do not change:

    module global
    integer :: i,j,n,p,nProcessors,s,v,w,infodpotrf,infodpotri,mkl_get_max_threads
    integer, dimension(3) :: ni = [100, 1000, 10000], pi = [100, 1000, 10000]
    integer, dimension(5) :: nProcessorsi = [1, 2, 4, 6, 12]
    real(8):: u,myone= 1.d0,t11,t22
    real:: t2,t1
    include 'omp_lib.h'
    end module global

Than in program part we call subroutines defined later on:

   program test
   call matrix_multi_inv_1
   call matrix_multi_inv_2
   end program test

Subroutines:

    subroutine matrix_multi_inv_1
    use global 
    use var_1

    open (unit=100,file="results.txt",status="unknown")

    do s=1,5
      nProcessors = nProcessorsi(s)
      CALL OMP_SET_NUM_THREADS(nProcessors)
      do v=1,3
        n=ni(v)
        do w=1,3
          p=pi(w)
          allocate(x(n,n),z(n,p),xi(n,n),xt(n,n))
            do i=1,n
               do j=1,p
                  call random_number(u)
                  z(i,j)=real(floor(u*3),8)
               enddo
            enddo

       1000 format(3(a20),2(i10),2(f15.3),i10)

            t11=omp_get_wtime()
            call cpu_time(t1)
            x=matmul(z,transpose(z))
            t22=omp_get_wtime()
            call cpu_time(t2)

            write(100,1000) 'x_integer_kind_1', 'G_real_8', 'matmul', n, p, t22-t11,t2-t1, mkl_get_max_threads() 

          deallocate(x,z,xi,xt)
        enddo
      enddo
    enddo
    end subroutine matrix_multi_inv_1

    subroutine matrix_multi_inv_2
    use global 
    use var_1

    open (unit=100,file="results.txt",status="unknown")

    do s=1,5
      nProcessors = nProcessorsi(s)
      CALL OMP_SET_NUM_THREADS(nProcessors)
      do v=1,3
        n=ni(v)
        do w=1,3
          p=pi(w)
          allocate(x(n,n),z(n,p),xi(n,n),xt(n,n))
            do i=1,n
               do j=1,p
                  call random_number(u)
                  z(i,j)=real(floor(u*3),8)
               enddo
            enddo

       1000 format(3(a20),2(i10),2(f15.3),i10)

            t11=omp_get_wtime()
            call cpu_time(t1)
            x=matmul(z,transpose(z))
            t22=omp_get_wtime()
            call cpu_time(t2)

            write(100,1000) 'x_integer_kind_2', 'G_real_8', 'matmul', n, p, t22-t11,t2-t1, mkl_get_max_threads() 

          deallocate(x,z,xi,xt)
        enddo
      enddo
    enddo
    end subroutine matrix_multi_inv_2

And here comes the problem. Subroutines are exactly the same except for the call module part. I have tried to use a contain statement in the subroutine but this does not work if the inner subroutine is called. Also I have tried to use subroutine with attribute but my compiler reports an error:

A kind type parameter must be a compile-time constant.

Does anyone know a nice solution how to optimize the code. When 10 different variation of different internal representation is tested than this code becomes just too big.

2

2 Answers

0
votes

Similar problem is usually solved by poor man templates using include`. You move the common part to another file and the just do

subroutine matrix_multi_inv_1
    use var_1
    include "common.f90"
end subroutine

subroutine matrix_multi_inv_2
    use var_2
    include "common.f90"
end subroutine

C preprocessor can be used for more power.

0
votes

Why not source out the relevant code into a file print_huge.inc.F90 to be included into modules:

! No module ... required
interface print_huge
  module procedure print_huge
end interface

contains

subroutine print_huge(a)
  real(kind=mykind),intent(in) :: a

  print *, huge(a)
end subroutine

! no end module

Then you can include this into different modules print_huge_N:

module print_huge_4
  integer,parameter :: mykind = 4

  include 'print_huge.inc.F90'
end module

module print_huge_8
  integer,parameter :: mykind = 8

  include 'print_huge.inc.F90'
end module

module print_huge_16
  integer,parameter :: mykind = 16

  include 'print_huge.inc.F90'
end module

Note, that each module has its own mykind defined!

For convenience you can make use of the interface defined to bundle the modules into one "super module" (inspired by the example in the book of Arjen Markus):

module print_huge_all
  use print_huge_4
  use print_huge_8
  use print_huge_16
end module

Then your main application would simply look like:

program huge_program
  use print_huge_all
  real(kind=4)  :: a1
  real(kind=8)  :: a2
  real(kind=16) :: a3

  call print_huge(a1)
  call print_huge(a2)
  call print_huge(a3)
end program

With the following output:

./a.out 
   3.40282347E+38
   1.7976931348623157E+308
   1.18973149535723176508575932662800702E+4932

The subroutine resides in the include file and does not need to be adjusted to all kinds. Of course, you could directly access all modules directly and/or "rename" the subroutines using the => operator.