To be more clear, what I want is passing the pointers and all the data they pointed to device. To test how I can achieve this goal, I wrote a simple class:
class vecarray{
public:
int * vecptr[N]; //array of pointers pointing to array
int dim[N]; //store length of each array pointed to
__device__ __host__ vecarray(); //constructor
__device__ __host__ int sum(); //sum up all the elements in the array being
//pointed to
}
vecarray::vecarray(){
for(int i = 0; i<N; i++)
{
vecptr[i] = NULL;
dim[i] = 0;
}
}
int vecarray::sum(){
int i=0, j=0, s=0;
for (i=0; i<N; i++)
for(j=0; j < dim[i]; j++)
s += vecptr[i][j];
return s;
}
Then I use this class in the following code:
#define N 2
__global__ void addvecarray( vecarray * v, int *s){
*s = v->sum();
}
int main(){ //copy *V to device, do sum() and pass back
vecarray *v, *dev_v; //the result by dev_v
v = new vecarray;
dev_v = new vecarray;
int a[3] = {1,2,3}; //initialize v manually
int b[4] = {4,5,6,7};
int result = 0;
int * dev_result;
v->vecptr[0] = a;
v->vecptr[1] = b;
v->dim[0] = 3; v->dim[1] = 4;
cudaMalloc((void**)&dev_v, sizeof(vecarray));
cudaMemcpy(dev_v, v, sizeof(vecarray),cudaMemcpyHostToDevice); //copy class object
for(int i = 0; i < N; i++){
cudaMalloc((void**)&(dev_v->vecptr[i]), v->dim[i]*sizeof(int));
}
for(int i = 0; i<N; i++ ){ //copy arrays
cudaMemcpy(dev_v->vecptr[i], v->vecptr[i], v->dim[i]*sizeof(int), cudaMemcpyHostToDevice));
}
addvecarray<<<1,1>>>(dev_v, dev_result);
cudaMemcpy(&result, dev_result, sizeof(int), cudaMemcpyDeviceToHost);
printf("the result is %d\n", result);
}
The code passed nvcc compiler, but failed with segmentation fault when running. I've checked the problem lies in the two cudaMalloc and cudaMemcpy opertation in the for-loop. So my question is how should I pass this object to CUDA? Thanks in advance.
dev_v
. – Robert Crovella