im trying to create a texture 3d from a part of a device array.
To do this, these are my steps:
- malloc Device Array
- Write Device Array
- Create CudaArray (3D)
- Bind Texture to CudaArray
The way im doing it it creates no compiler errors, but when i run cuda-memcheck it's failing when im trying to fetch data from the Texture.
Invalid global read of size 8 .. Address 0x10dfaf3a0 is out of bounds
Thats why i'm guessing i declared the texture Array wrong. here is how i access the texture:
tex3D(NoiseTextures[i],x,y,z)
The way im doing the steps mentioned above:
1.Malloc Device Array
cudaMalloc((void **)&d_Noise, sqrSizeNoise*nNoise*sizeof(float));
2.Write Device Array
curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen,Seed);
curandGenerateUniform(gen, d_Noise, sqrSizeNoise*nNoise);
curandDestroyGenerator(gen);
3+4.Creating the Cuda Array and binding it to the texture (Im guessing the mistake is here)
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();//cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray *d_cuArr;
cudaMalloc3DArray(&d_cuArr, &channelDesc, make_cudaExtent(SizeNoise,SizeNoise,SizeNoise), 0);
cudaMemcpy3DParms copyParams = {0};
//Loop for every separated Noise Texture (nNoise = 4)
for(int i = 0; i < nNoise; i++){
//initialize the textures
NoiseTextures[i] = texture<float, 3, cudaReadModeElementType>(1,cudaFilterModeLinear,cudaAddressModeWrap,channelDesc);
//Array creation
//+(sqrSizeNoise*i) is to separate the created Noise Array into smaller parts with the size of SizeNoise^3
copyParams.srcPtr = make_cudaPitchedPtr(d_Noise+(sqrSizeNoise*i), SizeNoise*sizeof(float), SizeNoise, SizeNoise);
copyParams.dstArray = d_cuArr;
copyParams.extent = make_cudaExtent(SizeNoise,SizeNoise,SizeNoise);
copyParams.kind = cudaMemcpyDeviceToDevice;
checkCudaErrors(cudaMemcpy3D(©Params));
//Array creation End
//new Bind
// set texture parameters
NoiseTextures[i].normalized = true; // access with normalized texture coordinates
NoiseTextures[i].filterMode = cudaFilterModeLinear; // linear interpolation
NoiseTextures[i].addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
NoiseTextures[i].addressMode[1] = cudaAddressModeWrap;
NoiseTextures[i].addressMode[2] = cudaAddressModeWrap;
// bind array to 3D texture
checkCudaErrors(cudaBindTextureToArray(NoiseTextures[i], d_cuArr, channelDesc));
//end Bind
}
cudaFreeArray(d_cuArr);
I've Pasted this code snippet to Pastebin so its easier to look at with colors etc. http://pastebin.com/SM3dYd38
I hope I clearly described my problem. If not pls comment!
Can you help me with this? Thanks for reading,
Cery
Edit: Here is a complete code so you can try it on your own machine:
#include <helper_cuda.h>
#include <helper_functions.h>
#include <helper_cuda_gl.h>
#include <texture_types.h>
#include <cuda_runtime.h>
#include <curand.h>
static texture<float, 3, cudaReadModeElementType> NoiseTextures[4];//texture Array
float *d_NoiseTest;//Device Array with random floats
int SizeNoiseTest = 32;
int sqrSizeNoiseTest = 32768;
void CreateTexture();
__global__ void AccesTexture(texture<float, 3, cudaReadModeElementType>* NoiseTextures)
{
int test = tex3D(NoiseTextures[0],threadIdx.x,threadIdx.y,threadIdx.z);//by using this the error occurs
}
int
main(int argc, char **argv)
{
CreateTexture();
}
void CreateTexture()
{
//curand Random Generator (needs compiler link -lcurand)
curandGenerator_t gen;
cudaMalloc((void **)&d_NoiseTest, sqrSizeNoiseTest*4*sizeof(float));//Allocation of device Array
curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen,1234ULL);
curandGenerateUniform(gen, d_NoiseTest, sqrSizeNoiseTest*4);//writing data to d_NoiseTest
curandDestroyGenerator(gen);
//cudaArray Descriptor
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
//cuda Array
cudaArray *d_cuArr;
cudaMalloc3DArray(&d_cuArr, &channelDesc, make_cudaExtent(SizeNoiseTest*sizeof(float),SizeNoiseTest,SizeNoiseTest), 0);
cudaMemcpy3DParms copyParams = {0};
//Loop for every separated Noise Texture (4 = 4)
for(int i = 0; i < 4; i++){
//initialize the textures
NoiseTextures[i] = texture<float, 3, cudaReadModeElementType>(1,cudaFilterModeLinear,cudaAddressModeWrap,channelDesc);
//Array creation
//+(sqrSizeNoise*i) is to separate the created Noise Array into smaller parts with the size of SizeNoise^3
copyParams.srcPtr = make_cudaPitchedPtr(d_NoiseTest+(sqrSizeNoiseTest*i), SizeNoiseTest*sizeof(float), SizeNoiseTest, SizeNoiseTest);
copyParams.dstArray = d_cuArr;
copyParams.extent = make_cudaExtent(SizeNoiseTest*sizeof(float),SizeNoiseTest,SizeNoiseTest);
copyParams.kind = cudaMemcpyDeviceToDevice;
checkCudaErrors(cudaMemcpy3D(©Params));
//Array creation End
//new Bind
// set texture parameters
NoiseTextures[i].normalized = true; // access with normalized texture coordinates
NoiseTextures[i].filterMode = cudaFilterModeLinear; // linear interpolation
NoiseTextures[i].addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
NoiseTextures[i].addressMode[1] = cudaAddressModeWrap;
NoiseTextures[i].addressMode[2] = cudaAddressModeWrap;
// bind array to 3D texture
checkCudaErrors(cudaBindTextureToArray(NoiseTextures[i], d_cuArr, channelDesc));
//end Bind
}
cudaFreeArray(d_cuArr);
AccesTexture<<<1,dim3(4,4,4)>>>(NoiseTextures);
}
You need to link -lcurand though. And include CUDA-6.0/samples/common/inc
Im now getting a different error in this code
code=11(cudaErrorInvalidValue) "cudaMemcpy3D(©Params)"
Even though it's the exact same code then my original. - Im starting to get completely confused. Thank you for your help
make_cudaExtent(SizeNoise*sizeof(float),SizeNoise,SizeNoise);- Kamil Czerskidim3(32,32,32)is not a valid threadblock configuration for any current CUDA GPU. You could not possibly be running that kernel, regardless of any preceding code. and we don't pass a texture reference as a parameter to a kernel. It is a static entity. You just use it. Probably best if you study basic texturing first. - Robert CrovellacudaMemcpy3Doperation. Refer to the documentation - Robert Crovella