Allgather can be a little confusing the first time you see it. There's a few things going on here.
First, the counts to allgather -- the send count and the recv count -- are the amounts of data being sent by each process, and recieved from each process.
Second, the way allgather works is that it concatenates the sent data. So if you have
int send[3];
int recv[9];
With the send arrays on each process looking like this:
send:
+---+---+---+
| 0 | 0 | 0 | rank 0
+---+---+---+
+---+---+---+
| 1 | 1 | 1 | rank 1
+---+---+---+
+---+---+---+
| 2 | 2 | 2 | rank 2
+---+---+---+
Then a call to
MPI_Allgather(send, 3, MPI_INT, recv, 3, MPI_INT, MPI_COMM_WORLD);
would result in:
recv:
+---+---+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 1 | 1 | 2 | 2 | 2 |
+---+---+---+---+---+---+---+---+---+
So a version of your code which pulls out the right data is:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
struct mystruct{
int sendarray[10];
int a;
char array2[10];
};
typedef struct mystruct struct_t;
int main (int argc, char ** argv)
{
int rank, size;
struct_t *recv;
int i, j;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
struct_t * fd = (struct_t*)malloc(sizeof(*fd));
for (i=0;i<10;i++){
fd->sendarray[i] = 0;
fd->array2[i] = 0;
}
recv = malloc ( size * sizeof(*fd) );
fd->sendarray[rank] = rank*10;
fd->array2[rank] = rank*20;
fd->a = rank;
printf("My rank is %d, fd->sendarray[%d] is %d\n", rank, i, fd->sendarray[i]);
MPI_Allgather (fd, sizeof(*fd), MPI_BYTE, recv, sizeof(*fd), MPI_BYTE, MPI_COMM_WORLD);
if (rank == 0) {
printf("Received:\n");
for (i=0;i<size;i++){
printf("---\n");
printf("int array: ");
for (j=0; j<10; j++) printf("%3d ", recv[i].sendarray[j]);
printf("\nint: "); printf("%3d\n", recv[i].a);
printf("char array: ");
for (j=0; j<10; j++) printf("%3d ", (int)(recv[i].array2[j]));
printf("\n");
}
}
MPI_Finalize();
return 0;
}
Note that it gathers those structures into the equivalent of an array of those structures. Running with 4 processors gives:
My rank is 0, fd->sendarray[10] is 0
My rank is 1, fd->sendarray[10] is 1
My rank is 2, fd->sendarray[10] is 2
My rank is 3, fd->sendarray[10] is 3
Received:
---
int array: 0 0 0 0 0 0 0 0 0 0
int: 0
char array: 0 0 0 0 0 0 0 0 0 0
---
int array: 0 10 0 0 0 0 0 0 0 0
int: 1
char array: 0 20 0 0 0 0 0 0 0 0
---
int array: 0 0 20 0 0 0 0 0 0 0
int: 2
char array: 0 0 40 0 0 0 0 0 0 0
---
int array: 0 0 0 30 0 0 0 0 0 0
int: 3
char array: 0 0 0 60 0 0 0 0 0 0
If you really want just the corresponding elements gathered, then you'd simply send one int/char from that particular location in the structure:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
struct mystruct{
int sendarray[10];
int a;
char array2[10];
};
typedef struct mystruct struct_t;
int main (int argc, char ** argv)
{
int rank, size;
struct_t fd;
struct_t recv;
int i, j;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
for (i=0;i<10;i++){
fd.sendarray[i] = 0;
fd.array2[i] = 0;
recv.sendarray[i] =999;
recv.array2[i] = 99;
}
recv.a =999;
fd.sendarray[rank] = rank*10;
fd.array2[rank] = (char)(rank*20);
fd.a = rank;
printf("My rank is %d, fd.sendarray[%d] is %d\n", rank, rank, fd.sendarray[rank]);
MPI_Allgather (&(fd.sendarray[rank]), 1, MPI_INT, recv.sendarray, 1, MPI_INT, MPI_COMM_WORLD);
MPI_Allgather (&(fd.array2[rank]), 1, MPI_CHAR, recv.array2, 1, MPI_CHAR, MPI_COMM_WORLD);
if (rank == 0) {
printf("Received:\n");
printf("---\n");
printf("int array: ");
for (j=0; j<10; j++) printf("%3d ", recv.sendarray[j]);
printf("\nint: "); printf("%3d\n", recv.a);
printf("char array: ");
for (j=0; j<10; j++) printf("%3d ", (int)(recv.array2[j]));
printf("\n");
}
MPI_Finalize();
return 0;
}
If we run this with 4 processes, we get:
My rank is 0, fd.sendarray[0] is 0
My rank is 1, fd.sendarray[1] is 10
My rank is 2, fd.sendarray[2] is 20
My rank is 3, fd.sendarray[3] is 30
Received:
---
int array: 0 10 20 30 999 999 999 999 999 999
int: 999
char array: 0 20 40 60 99 99 99 99 99 99