
I have device variable and in this variable, I allocate and fill an array in the device, but I have a problem to get data to host. cudaMemcpy() return cudaErrorInvalidValue error. how can I do it?

PS: The Code is just example, I know, that In this particular case I can use cudaMalloc because I know the size of the array, but In my REAL code, It computes the size of the array in the device and it needs immediately allocate memory.

PS2: I found a similar problem, but I still don't know, how can I solve it? - copy data which is allocated in device from device to host

PS3: I have updated code, but still doesn't work:{

PS4: I am just trying to run this code on a notebook with Nvidia GT 520MX(latest game driver) and doesn't work too :(


#include <cuda.h>
#include <stdio.h>

#define N 400
__device__ int* d_array;

__global__ void allocDeviceMemory()
    d_array = new int[N];
    for(int i=0; i < N; i++)
         d_array[i] = 123;

int main()
    allocDeviceMemory<<<1, 1>>>();


    int* d_a = NULL;
    cudaMemcpyFromSymbol((void**)&d_a, "d_array", sizeof(d_a), 0, cudaMemcpyDeviceToHost);
    printf("gpu adress: %lld\n", d_a);

    int* h_array = (int*)malloc(N*sizeof(int));
    cudaError_t errr = cudaMemcpy(h_array, d_a, N*sizeof(int), cudaMemcpyDeviceToHost);
    printf("h_array: %d, %d\n", h_array[0], errr);

    return 0;

2 Answers


You need to synchronize (cudaDeviceSynchronize()) after launching the kernel to allocate the memory.

Can you also check the return value of the sync and all other CUDA API calls?


i have tested your code and there is no error here. I am running CUDA 4.0.