Sorting 3 arrays by key in CUDA (using Thrust perhaps)

Question

I have 3 arrays of the same size (more than 300.000 elements). One array of float numbers and two arrays of indices. So, for each number I have 2 IDs.

All the 3 arrays are already in GPU global memory. I want to sort all the numbers with theirs IDs accordingly.

Is there any way I can use Thrust library to do this task? Is there any better way than Thrust library?

Of course, I prefer not to copy them to and from host memory a couple of times. By the way, they're arrays not vectors.

Thanks for your help in advance.

Tentative solution, but this is extremely slow. It takes almost 4 seconds and my array size is in order of 300000

thrust::device_ptr<float> keys(afterSum);
thrust::device_ptr<int> vals0(d_index);
thrust::device_ptr<int> vals1(blockId); 

thrust::device_vector<int> sortedIndex(numElements);
thrust::device_vector<int> sortedBlockId(numElements);

thrust::counting_iterator<int> iter(0);
thrust::device_vector<int> indices(numElements);
thrust::copy(iter, iter + indices.size(), indices.begin()); 

thrust::sort_by_key(keys, keys + numElements , indices.begin());    

thrust::gather(indices.begin(), indices.end(), vals0, sortedIndex.begin());
thrust::gather(indices.begin(), indices.end(), vals1, sortedBlockId.begin());

thrust::host_vector<int> h_sortedIndex=sortedIndex;
thrust::host_vector<int> h_sortedBlockId=sortedBlockId;

harrism harrism · Accepted Answer · 2011-07-08T00:43:24

Of course you can use Thrust. First, you need to wrap your raw CUDA device pointers with thrust::device_ptr. Assuming your float values are in the array pkeys, and the IDs are in the arrays pvals0 and pvals1, and numElements is the length of the arrays, something like this should work:

#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include <thrust/gather.h>
#include <thrust/iterator/counting_iterator.h>

cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);

cudaEventRecord(start);

thrust::device_ptr<float> keys(pkeys);
thrust::device_ptr<int> vals0(pvals0);
thrust::device_ptr<int> vals1(pvals1);

// allocate space for the output
thrust::device_vector<int> sortedVals0(numElements);
thrust::device_vector<int> sortedVals1(numElements);

// initialize indices vector to [0,1,2,..]
thrust::counting_iterator<int> iter(0);
thrust::device_vector<int> indices(numElements);
thrust::copy(iter, iter + indices.size(), indices.begin());

// first sort the keys and indices by the keys
thrust::sort_by_key(keys.begin(), keys.end(), indices.begin());

// Now reorder the ID arrays using the sorted indices
thrust::gather(indices.begin(), indices.end(), vals0.begin(), sortedVals0.begin());
thrust::gather(indices.begin(), indices.end(), vals1.begin(), sortedVals1.begin());

cudaEventRecord(stop);
cudaEventSynchronize(stop);
float milliseconds = 0;
cudaEventElapsedTime(&milliseconds, start, stop);
printf("Took %f milliseconds for %d elements\n", milliseconds, numElements);

Sorting 3 arrays by key in CUDA (using Thrust perhaps)

3 Answers