I have found that the Cuda atomicOr function is not recognized in my Thrust program compiled in Visual Studio 2012.
I have read that all header files should already be included when the NVidia nvcc compiler is invoked. Most postings on this issue state that this must mean the architectural settings are incorrect.
I have tried it with these settings based on other postings: How to set CUDA compiler flags in Visual Studio 2010?
...as well as using: http://s1240.photobucket.com/user/fireshot8888/media/cuda_settings.png.html
main.cpp:
#include <thrust/device_vector.h>
#include <cstdlib>
#include <iostream>
#include "cuda.h"
using namespace std;
//Visual C++ compiled main function to launch the GPU calling code
int main(int argc, char *argv[])
{
//Just some random data hand keyed to make it a complete example for stack overflow while not being too complicated
float data[] = {1.2, 3.4, 3.4, 3.3, 4.4, 4.4, 4.4, 3.4, 4.4, 4.4,
1.2, 3.4, 3.4, 3.3, 4.4, 4.4, 4.4, 3.4, 4.4, 4.4};
thrust::host_vector<float> h_data(data, data+20); //Holds the contents of the file as they are read; it will be cleared once we are done with it.
const int numVars = 10;
int numBins = 4;
int rowCount = 2;
doHistogramGPU(numVars, h_data, numBins, rowCount);
return 0;
}
cuda.cu:
#include "cuda.h"
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
//I GAVE THIS A TRY BUT IT DID NOT FIX MY ISSUE::::
#include <cuda_runtime.h>
#include <cuda.h>
using namespace std;
//Function to call the kernel
void doHistogramGPU(int numVars, thrust::host_vector<float> h_buffer, int numBins, int numRecords)
{
int dataSize = sizeof(BYTE_UNIT);
int shiftSize = dataSize - 1;
thrust::device_vector<float> d_buffer(h_buffer.begin(), h_buffer.end());
int bitVectorSize = ceil(numRecords * numVars / (float)dataSize);
thrust::device_vector<BYTE_UNIT> d_bitData(bitVectorSize * numBins);
thrust::counting_iterator<int> counter(0);
auto zipInFirst = thrust::make_zip_iterator(thrust::make_tuple(d_buffer.begin(), counter));
auto zipInLast = thrust::make_zip_iterator(thrust::make_tuple(d_buffer.end(), counter + d_buffer.size()));
float minValues[] = {579.8, 72.16, 0.000385, 7.576e-005, 6.954e-005, 0, 0, 2.602e-012, 1.946e-013, 7.393e-015};
float maxValues[] = {1053, 22150, 0.7599, 0.7596, 0.24, 0.2398, 0.1623, 1.167e-007, 4.518e-006, 5.322e-008};
//Get things loaded onto the device then call the kernel
thrust::device_vector<float> d_minValues(minValues, minValues+10);
thrust::device_vector<float> d_maxValues(maxValues, maxValues+10);
thrust::device_ptr<float> minDevPtr = &d_minValues[0];
thrust::device_ptr<float> maxDevPtr = &d_maxValues[0];
thrust::device_ptr<BYTE_UNIT> dataDevPtr = &d_bitData[0];
//Invoke the Thrust Kernel
thrust::for_each(zipInFirst, zipInLast, BinFinder(thrust::raw_pointer_cast(dataDevPtr), thrust::raw_pointer_cast(minDevPtr), thrust::raw_pointer_cast(maxDevPtr), numVars, numBins, numRecords));
cout << endl;
return;
}
cuda.h:
#ifndef CUDA_H
#define CUDA_H
#include <thrust/device_vector.h>
#include <iostream>
//I tried these here, too...
#include <cuda_runtime.h>
#include <cuda.h>
using namespace std;
typedef long BYTE_UNIT; //32 bit storage
void doHistogramGPU(int numvars, thrust::host_vector<float> h_buffer, int numBins, int numRecords);
struct BinFinder
{
BYTE_UNIT * data;
float * rawMinVector;
float * rawMaxVector;
int numVars;
int numBins;
int numRecords;
BinFinder(BYTE_UNIT * data, float * rawMinVector, float * rawMaxVector, int numVars, int numBins, int numRecords)
{
this -> data = data;
this -> rawMinVector = rawMinVector;
this -> rawMaxVector = rawMaxVector;
this -> numVars = numVars;
this -> numBins = numBins;
this -> numRecords = numRecords;
}
//This kernel converts the multidimensional bin representation to a single dimensional representation
template <typename Tuple>
__device__ void operator()( Tuple param )
{
int dataSize = sizeof(BYTE_UNIT);
int shiftSize = dataSize - 1;
int bitVectorSize = ceil(numRecords * numVars / float(dataSize));
float value = thrust::get<0>(param);
int id = thrust::get<1>(param);
//Look up the min and max values for this data column using the index
float min = rawMinVector[id % numVars];
float max = rawMaxVector[id % numVars];
//Calculate the bin id
float percentage = (value - min) / float(max - min);
char bin = percentage * numBins;
if (bin == numBins)
{
bin--;
}
//////////////////////////////////////////////////////////////
//Set a 1 in the appropriate bitvector for the calculated bin
//////////////////////////////////////////////////////////////
//What I originally tried to do that appeared to have generated race conditions (using data from a file):
//data[bin * bitVectorSize + id / dataSize] |= (1 << (shiftSize - id % dataSize));
//What I've been trying to do now that generates a compilation error:
atomicOr(data + (bin * bitVectorSize + id / dataSize), 1 << (shiftSize - id % dataSize)); //<----THIS DOESN'T COMPILE!!!!!!!!!
}
};
#endif
nvcc command for cuda.cu (which includes my cuda.h file):
"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/bin/nvcc.exe" "C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu" -c -o "C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/build/CMakeFiles/CudaLib.dir//Debug/CudaLib_generated_cuda.cu.obj" -ccbin "C:/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin" -m64 -Xcompiler ,\"/DWIN32\",\"/D_WINDOWS\",\"/W3\",\"/GR\",\"/EHsc\",\"/D_DEBUG\",\"/MDd\",\"/Zi\",\"/Ob0\",\"/Od\",\"/RTC1\" -DNVCC "-IC:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/include" "-IC:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/include"
Full error output by nvcc:
1>nvcc : warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(107): warning : variable "minValues" was declared but never referenced
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(108): warning : variable "maxValues" was declared but never referenced
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(462): warning : variable "shiftSize" was declared but never referenced
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(602): warning : initial value of reference to non-const must be an lvalue
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(618): warning : dynamic initialization in unreachable code
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(522): warning : variable "shiftSize" was declared but never referenced
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(975): warning : initial value of reference to non-const must be an lvalue
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(993): warning : initial value of reference to non-const must be an lvalue
1>
1>C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(1022): warning : variable "shiftSize" was declared but never referenced
1>
1>c:\users\datahead8888\documents\visual studio 2012\projects\thrust-space-data\src\cuda.h(188): error : identifier "atomicOr" is undefined
1> detected during:
1> instantiation of "void BinFinder::operator()(Tuple) [with Tuple=thrust::detail::tuple_of_iterator_references]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/detail/function.h(119): here
1> instantiation of "Result thrust::detail::device_function::operator()(const Argument &) const [with Function=BinFinder, Result=void, Argument=thrust::detail::tuple_of_iterator_references, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/for_each.inl(82): here
1> instantiation of "thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure::result_type thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure::operator()() [with RandomAccessIterator=thrust::zip_iterator>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Size=unsigned int, UnaryFunction=BinFinder, Context=thrust::system::cuda::detail::detail::blocked_thread_array]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/detail/launch_closure.inl(49): here
1> instantiation of "void thrust::system::cuda::detail::detail::launch_closure_by_value(Closure) [with Closure=thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, unsigned int, BinFinder, thrust::system::cuda::detail::detail::blocked_thread_array>]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/detail/launch_closure.inl(77): here
1> instantiation of "thrust::system::cuda::detail::detail::closure_launcher_base::launch_function_t thrust::system::cuda::detail::detail::closure_launcher_base::get_launch_function() [with Closure=thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, unsigned int, BinFinder, thrust::system::cuda::detail::detail::blocked_thread_array>, launch_by_value=true]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/detail/launch_closure.inl(185): here
1> [ 2 instantiation contexts not shown ]
1> instantiation of "thrust::tuple thrust::system::cuda::detail::for_each_n_detail::configure_launch(Size) [with Closure=thrust::system::cuda::detail::for_each_n_detail::for_each_n_closure>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, unsigned int, BinFinder, thrust::system::cuda::detail::detail::blocked_thread_array>, Size=long long]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/for_each.inl(163): here
1> instantiation of "RandomAccessIterator thrust::system::cuda::detail::for_each_n(thrust::system::cuda::detail::execution_policy &, RandomAccessIterator, Size, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, RandomAccessIterator=thrust::zip_iterator>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Size=long long, UnaryFunction=BinFinder]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/system/cuda/detail/for_each.inl(191): here
1> instantiation of "RandomAccessIterator thrust::system::cuda::detail::for_each(thrust::system::cuda::detail::execution_policy &, RandomAccessIterator, RandomAccessIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, RandomAccessIterator=thrust::zip_iterator>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, UnaryFunction=BinFinder]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/detail/for_each.inl(43): here
1> instantiation of "InputIterator thrust::for_each(const thrust::detail::execution_policy_base &, InputIterator, InputIterator, UnaryFunction) [with DerivedPolicy=thrust::system::cuda::detail::tag, InputIterator=thrust::zip_iterator>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, UnaryFunction=BinFinder]"
1> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include\thrust/detail/for_each.inl(57): here
1> instantiation of "InputIterator thrust::for_each(InputIterator, InputIterator, UnaryFunction) [with InputIterator=thrust::zip_iterator>, thrust::counting_iterator, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, UnaryFunction=BinFinder]"
1> C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu(597): here
1>
1> 1 error detected in the compilation of "C:/Users/DATAHE~1/AppData/Local/Temp/tmpxft_00001f78_00000000-8_cuda.cpp1.ii".
1> cuda.cu
atomicOr
on a byte pointer, there is no such function provided by CUDA. (I can't tell whatBYTE_UNIT
is, since you've not provided a complete code.) – Robert Crovella