I am rewriting a numerical simulation code that is parallelized using MPI in one direction. So far, the arrays containing the data were saved by the master MPI process, which implied transferring the data from all MPI processes to one and allocate huge arrays to store the whole thing. It is not very efficient nor classy, and is a problem for large resolutions.
I am therefore trying to use MPI-IO to write directly the file from the distributed arrays. One of the constraint I have is that the written file needs to respect the fortran "unformatted" format (i.e. 4 bytes integer before and after each field indicating its size).
I wrote a simple test program, that works when I write only one distributed array to the file. However, when I write several arrays, the total size of the file is wrong and when comparing to the equivalent fortran 'unformatted' file, the files are different.
Here is the sample code :
module arrays_dim
implicit none
INTEGER, PARAMETER :: dp = kind(0.d0)
integer, parameter :: imax = 500
integer, parameter :: jmax = 50
integer, parameter :: kmax = 10
end module arrays_dim
module mpi_vars
use mpi
implicit none
integer, save :: ierr, myID, numprocs
integer, save :: i_start, i_end, i_mean, i_loc
integer, save :: subArray, fileH
integer(MPI_OFFSET_KIND), save :: offset, currPos
end module mpi_vars
program test
use mpi
use arrays_dim
use mpi_vars
real(dp), dimension(0:imax,0:jmax+1,0:kmax+1) :: v, w
real(dp), dimension(:,:,:), allocatable :: v_loc, w_loc
integer :: i, j, k
call MPI_INIT(ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, myID, ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, numprocs, ierr)
i_mean = (imax+1)/numprocs
i_start = myID*i_mean
i_end = i_start+i_mean-1
if(i_mean*numprocs<imax+1) then
if(myID == numprocs-1) i_end = imax
endif
i_loc = i_end - i_start + 1
allocate(v_loc(i_start:i_end,0:jmax+1,0:kmax+1))
allocate(w_loc(i_start:i_end,0:jmax+1,0:kmax+1))
print*, 'I am:', myID, i_start, i_end, i_loc
do k=0,kmax+1
do j=0,jmax+1
do i=0,imax
v(i,j,k) = i+j+k
w(i,j,k) = i*j*k
enddo
enddo
enddo
if(myID==0) then
open(10,form='unformatted')
write(10) v
!write(10) w
close(10)
endif
do k=0,kmax+1
do j=0,jmax+1
do i=i_start,i_end
v_loc(i,j,k) = i+j+k
w_loc(i,j,k) = i*j*k
enddo
enddo
enddo
call MPI_Type_create_subarray (3, [imax+1, jmax+2, kmax+2], [i_loc, jmax+2, kmax+2], &
[i_start, 0, 0], &
MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, subArray, ierr)
call MPI_Type_commit(subArray, ierr)
call MPI_File_open(MPI_COMM_WORLD, 'mpi.dat', &
MPI_MODE_WRONLY + MPI_MODE_CREATE + MPI_MODE_APPEND, &
MPI_INFO_NULL, fileH, ierr )
call saveMPI(v_loc, (i_loc)*(jmax+2)*(kmax+2))
!call saveMPI(w_loc, (i_loc)*(jmax+2)*(kmax+2))
call MPI_File_close(fileH, ierr)
deallocate(v_loc,w_loc)
call MPI_FINALIZE(ierr)
end program test
!
subroutine saveMPI(array, n)
use mpi
use arrays_dim
use mpi_vars
implicit none
real(dp), dimension(n) :: array
integer :: n
offset = (imax+1)*(jmax+2)*(kmax+2)*8
if(myID==0) then
call MPI_File_seek(fileH, int(0,MPI_OFFSET_KIND), MPI_SEEK_CUR, ierr)
call MPI_File_write(fileH, [(imax+1)*(jmax+2)*(kmax+2)*8], 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
call MPI_File_seek(fileH, offset, MPI_SEEK_CUR, ierr)
call MPI_File_write(fileH, [(imax+1)*(jmax+2)*(kmax+2)*8], 1, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
endif
call MPI_File_set_view(fileH, int(4,MPI_OFFSET_KIND), MPI_DOUBLE_PRECISION, subArray, 'native', MPI_INFO_NULL, ierr)
call MPI_File_write_all(fileH, array, (i_loc)*(jmax+2)*(kmax+2), MPI_DOUBLE_PRECISION, MPI_STATUS_IGNORE, ierr)
end subroutine saveMPI
when the lines !write(10) w
and !call saveMPI(w_loc, (i_loc)*(jmax+2)*(kmax+2))
are commented (i.e. I only write the v array), the code is working fine :
mpif90.openmpi -O3 -o prog main.f90
mpirun.openmpi -np 4 ./prog
cmp mpi.dat fort.10
cmp does not generate an output, so the files are identical. If however I uncomment these lines, then the resulting files (mpi.dat and fort.10) are different. I am sure that the problem lies in the way I define the offset I use to write the data at the right position on the file, but I do not know how to indicate to the second call of saveMPI that the initial position should be the end of the file. What am I missing ?