0
votes

i am multiplying 2 matrices in MPI by creating 2d topology i am getting the error

An error occurred in MPI_Type_create_struct
on communicator MPI_COMM_WORLD
MPI_ERR_TYPE: invalid datatype
*** MPI_ERRORS_ARE_FATAL (your MPI job will now abort)
--------------------------------------------------------------------------
mpiexec has exited due to process rank 0 with PID 21294 on
node hpc-nist.nist.local exiting without calling "finalize". This may
have caused other processes in the application to be
terminated by signals sent by mpiexec (as reported here)

i know it has occurred in line 74 but don't know why everything seems to be alright

#include<mpi.h>
#include<stdio.h>
#include<stdlib.h>
#define NUM_ROW_A 225
#define NUM_COL_A 300
#define NUM_ROW_B 300
#define NUM_COL_B 150
int main()
{
    double a[NUM_ROW_A][NUM_COL_A],b[NUM_ROW_B][NUM_COL_B],c[NUM_ROW_A][NUM_COL_B];
    int n[3]={NUM_ROW_A,NUM_COL_A,NUM_COL_B};
    int p[2]={3,2};
    MPI_Comm comm =MPI_COMM_WORLD;
    MPI_Comm comm_2d,comm_1d[2],pcomm;
    int nn[2];
    double aa[3][NUM_COL_A],bb[NUM_COL_A][2],cc[3][2];
    int coords[2];
    int rank;
    int *dispc,*countc;
    int i,j,k;//ierr;
    int periods[2]={0,0};
    int remains[2];
    //int sizeofdouble=sizeof(double);
    MPI_Aint sizeofreal;
    double s_time,f_time;
    MPI_Datatype typea,typec,types[2];
    int blen[2];
    MPI_Aint disp[2];
    MPI_Init(NULL,NULL);
    s_time=MPI_Wtime();
    MPI_Comm_dup(comm,&pcomm);
    MPI_Bcast(n,3,MPI_INT,0,pcomm);
    MPI_Bcast(p,2,MPI_INT,0,pcomm);
    //periods={0,0};
    MPI_Cart_create(pcomm,2,p,periods,0,&comm_2d);
    MPI_Comm_rank(comm_2d,&rank);
    MPI_Cart_coords(comm_2d,rank,2,coords);
    for(i=0;i<2;i++)
    {
        for(j=0;j<2;j++)
            remains[j]=(i==j);
        MPI_Cart_sub(comm_2d,remains,&comm_1d[i]);
    }
    nn[0]=n[0]/p[0];
    nn[1]=n[2]/p[1];
    if(rank==0)
    {
        for(i=0; i<n[0]; i++)
        {
            for(j=0; j<n[1]; j++)
            {
                double randNr =  (rand()/9.9);
                a[i][j] = randNr;
            }
        }
        for(i=0; i<n[1]; i++)
        {
            for(j=0; j<n[2]; j++)
            {
                double randNr =  (rand()/9.9);
                b[i][j] = randNr;
            }
        }
        MPI_Type_vector(n[1],nn[0],n[0],MPI_DOUBLE,&types[0]);
        MPI_Type_extent(MPI_DOUBLE,&sizeofreal);
        disp[0]=0;
        disp[1]=sizeofreal*nn[0];
        blen[0]=1;
        blen[1]=1;
        types[2]=MPI_UB;
        printf("hi%ld\n",disp[1]);
        MPI_Type_struct(2,blen,disp,types,&typea);
        printf("hi\n");
        MPI_Type_commit(&typea);
        MPI_Type_vector(nn[1],nn[0],n[0],MPI_DOUBLE,&types[1]);
        MPI_Type_struct(2,blen,disp,types,&typec);
        MPI_Type_commit(&typec);
        dispc=(int *)malloc(p[0]*p[1]*sizeof(int));
        countc=(int *)malloc(p[0]*p[1]*sizeof(int));
        for(i=0;i<p[0];i++)
        {
            for(j=0;j<p[1];j++)
            {
                dispc[(i-1)*p[1]+j]=((j-1)*p[0]+(i-1)*nn[1]);
                countc[(i-1)*p[1]+j]=1;
            }
        }
        printf("hi\n");
    }
    if(coords[1]==0)
        MPI_Scatter(a,1,typea,aa,nn[0]*n[1],MPI_DOUBLE,0,comm_1d[0]);
    if(coords[0]==0)
        MPI_Scatter(b,n[1]*nn[1],MPI_DOUBLE,bb,n[1]*nn[1],MPI_DOUBLE,0,comm_1d[1]);
    MPI_Bcast(aa,nn[0]*n[1],MPI_DOUBLE,0,comm_1d[1]);
    MPI_Bcast(bb,n[1]*nn[1],MPI_DOUBLE,0,comm_1d[0]);
    for(i=0;i<nn[0];i++)
    {
        for(j=0;j<nn[1];j++)
        {
            cc[i][j]=0.0;
            for(k=0;k<n[1];k++)
                cc[i][j]+=a[i][k]*b[k][j];
        }
    }
    MPI_Gatherv(cc,nn[0]*nn[1],MPI_DOUBLE,c,countc,dispc,typec,0,comm_2d);
    f_time=MPI_Wtime();
    if(rank==0)
    {
        printf("matrix a:\n");
        for(i=0;i<n[0];i++)
        {
            for(j=0;j<n[1];j++)
                printf("%lf\t",a[i][j]);
            printf("\n");
        }
        printf("matrix b:\n");
        for(i=0;i<n[1];i++)
        {
            for(j=0;j<n[2];j++)
                printf("%lf\t",b[i][j]);
            printf("\n");
        }
        printf("matrix c:\n");
        for(i=0;i<n[0];i++)
        {
            for(j=0;j<n[2];j++)
                printf("%lf\t",c[i][j]);
            printf("\n");
        }
        printf("time take = %1.2lf\n",f_time-s_time);
    }
    MPI_Finalize();
    return 0;
}
1
Seriously dude, if you want any help you have to make it easier for SO. Your code has too much indentation, which requires l/r scrolling; dial it back. You state that the error has occurred in line 74; if you've marked that line I can't see it and I'm not about to start counting lines. Finally, you seem to have posted your entire code and invited SO to sort it out. Where's the evidence that you've made any serious effort to sort out your own problem ? Evidence such as a minimal, compilable program which exhibits the error you report.High Performance Mark

1 Answers

3
votes

Your error is not on line 74, but rather on line I_AM_TOO_LAZY_TO_COUNT:

types[2] = MPI_UB;
      ^

This should most likely read types[1] = MPI_UB; instead.