I am a beginner in OpenCL and thus writing a simple program to double the elements of an array. The kernel code is:-
__kernel void dataParallel(__global int* A, __global int* B)
{
int base = get_local_id(0);
B[base]=A[base]+A[base];
}
The local_work_size=32 as I am squaring 32 elements.
In my program I have declared an integer array which holds the elements to be squared.
int *A;
A=(int*)malloc(sizeof(int)*64);
for (i=0; i < 32; i++) { A[i] = i; }
platforms[i]
stores the platform id, devices[j]
stores the corresponding device id. Their types:
cl_platform_id* platforms;
cl_device_id* devices;
Creating context
cl_context context=clCreateContext(NULL,1,&devices[j],NULL,NULL,NULL);
Next comes the command queue
cl_command_queue cmdqueue=cmdqueue=clCreateCommandQueue(context,devices[j],NULL,&err);
Next I created 2 memory buffers, one to hold the input data and the other to hold the result.
cl_mem Abuffer,Bbuffer;
Abuffer=clCreateBuffer(context, CL_MEM_READ_WRITE ,32*sizeof(int),NULL,&err);
Bbuffer=clCreateBuffer(context, CL_MEM_READ_WRITE ,32*sizeof(int),NULL,&err);
Then I copied the data of array A to Abuffer
ret=clEnqueueWriteBuffer(cmdqueue, Abuffer, CL_TRUE, 0, 32*sizeof(int), A, 0, NULL, NULL);
printf("%d",ret);//output is 0 thus data written successfully into the buffer
The kernel code was then read into a character string source_str
and the program was created.
kernelprgrm=clCreateProgramWithSource(context,1,(const char **)&source_str,(const size_t *)&source_size,&err);
if(!err)
{
printf("\nKernel program created successfully\n");
}//Outputs -Kernel program created successfully
I then built the program using:
ret=clBuildProgram(kernelprgrm,1,&devices[j],NULL,NULL,NULL);//returns CL_SUCCESS
Getting buildinfo next
ret=clGetProgramBuildInfo(kernelprgrm,devices[j], CL_PROGRAM_BUILD_STATUS ,0,NULL,&size);//Returns success
Creating kernel
kernel = clCreateKernel(kernelprgrm, "dataParallel", &ret);
printf("\nReturn kernel program=%d",ret);
if(!ret)
{
printf("\nProgram created successfully!\n");
}
//Outputs -Program created successfully!
Now comes the devil:-
ret=clSetKernelArg(kernel,0,sizeof(cl_mem),(void *) Abuffer);
printf("\nKernel argument 1 ret=%d",ret);
ret=clSetKernelArg(kernel,1,sizeof(cl_mem),(void *) Bbuffer);
printf("\nKernel argument 2 ret=%d",ret);
Both return -38 meaning CL_INVALID_MEM_OBJECT
.
P.S.:As per the errors pointed out i.e. use &Abuffer instead of Abuffer in the argument and after making the necessary changes, both return 0
size_t global_item_size = 32;
size_t local_item_size = 32;
Also ret = clEnqueueNDRangeKernel(cmdqueue, kernel, 1, NULL,&global_item_size, &local_item_size, 0, NULL, NULL);
returns 0.
Trying to get the result
ret = clEnqueueReadBuffer(cmdqueue, Bbuffer, CL_TRUE, 0, 32*sizeof(int), B, 0, NULL, NULL);`
printf("\nB:-\n");
for (t=0; t < 32; t++) {
printf("%d\t ", B[t]);
}
This returns buildstatus=0 with core getting dumped for my AMD GPU (running AMD Accelerated Parallel Processing platform) and NVIDIA GPU whereas it works perfectly fine if the selected device is CPU using Intel(R) OpenCL platform.
Also I tried getting the build log using:
cl_build_status *status=(cl_build_status *)malloc(sizeof(cl_build_status )*size);
clGetProgramBuildInfo(kernelprgrm,devices[j], CL_PROGRAM_BUILD_STATUS ,size,status,NULL);
printf("\nBuild status=%d\n",*status);
//Getting build info if not successful
clGetProgramBuildInfo(kernelprgrm,devices[i], CL_PROGRAM_BUILD_LOG ,0,NULL,&size);
char *buildlog=(char*)malloc(size);
clGetProgramBuildInfo(kernelprgrm,devices[i], CL_PROGRAM_BUILD_LOG ,size,buildlog,NULL);
printf("\n!!!!!!!!!!!!!!!!!!!!!Program ended!!!!!!!!!!!\n");
printf("\n\nBuildlog: %s\n\n",buildlog);
But it returns Buildlog: Compilation started
Compilation done
Linking started
Linking done
Device build started
Device build done
Kernel <dataParallel> was successfully vectorized (4)
Done.