I want to use Thrust library to calculate prefix sum of device array in CUDA.
My array is allocated with cudaMalloc(). My requirement is as follows:
main()
{
Launch kernel 1 on data allocated through cudaMalloc()
// This kernel will poplulate some data d.
Use thrust to calculate prefix sum of d.
Launch kernel 2 on prefix sum.
}
I want to use Thrust somewhere between my kernels so I need method to convert pointers to device iterators and back.What is wrong in following code?
int main()
{
int *a;
cudaMalloc((void**)&a,N*sizeof(int));
thrust::device_ptr<int> d=thrust::device_pointer_cast(a);
thrust::device_vector<int> v(N);
thrust::exclusive_scan(a,a+N,v);
return 0;
}