Continuing on my CUDA beginner's adventure, I've been introduced to Thrust, which seems a convenient lib that saves me the hassle of explicit memory (de-)allocation.
I've already tried combining it with a few cuBLAS routines, e.g. gemv, by generating a raw pointer to the underlying storage with thrust::raw_pointer_cast(array.data()) and then feeding this to the routines, and it works just fine.
The current task is to get the inverse of a matrix, and for that I'm using getrfBatched and getriBatched. From the documentation:
cublasStatus_t cublasDgetrfBatched(cublasHandle_t handle,
int n,
double *Aarray[],
int lda,
int *PivotArray,
int *infoArray,
int batchSize);
where
Aarray - device - array of pointers to <type> array
Naturally I thought I could use another layer of Thrust vector to express this array of pointers and again feed its raw pointer to cuBLAS, so here's what I did:
void test()
{
thrust::device_vector<double> in(4);
in[0] = 1;
in[1] = 3;
in[2] = 2;
in[3] = 4;
cublasStatus_t stat;
cublasHandle_t handle;
stat = cublasCreate(&handle);
thrust::device_vector<double> out(4, 0);
thrust::device_vector<int> pivot(2, 0);
int info = 0;
thrust::device_vector<double*> in_array(1);
in_array[0] = thrust::raw_pointer_cast(in.data());
thrust::device_vector<double*> out_array(1);
out_array[0] = thrust::raw_pointer_cast(out.data());
stat = cublasDgetrfBatched(handle, 2,
(double**)thrust::raw_pointer_cast(in_array.data()), 2,
thrust::raw_pointer_cast(pivot.data()), &info, 1);
stat = cublasDgetriBatched(handle, 2,
(const double**)thrust::raw_pointer_cast(in_array.data()), 2,
thrust::raw_pointer_cast(pivot.data()),
(double**)thrust::raw_pointer_cast(out_array.data()), 2, &info, 1);
}
When executed, stat says CUBLAS_STATUS_SUCCESS (0) and info says 0 (execution successful), yet if I try to access the elements of in, pivot or out with standard bracket notation, I hit a thrust::system::system_error. Seems to me that the corresponding memory got corrupted somehow.
Anything obvious that I'm missing here?