1
votes

I am a beginner in OpenCL and thus writing a simple program to double the elements of an array. The kernel code is:-

__kernel void dataParallel(__global int* A, __global int* B)
{
    int base = get_local_id(0);
    B[base]=A[base]+A[base];
}

The local_work_size=32 as I am squaring 32 elements.

In my program I have declared an integer array which holds the elements to be squared.

int *A;
A=(int*)malloc(sizeof(int)*64);
for (i=0; i < 32; i++) {            A[i] = i;   }

platforms[i] stores the platform id, devices[j] stores the corresponding device id. Their types:

cl_platform_id* platforms;
cl_device_id* devices;

Creating context

cl_context context=clCreateContext(NULL,1,&devices[j],NULL,NULL,NULL);

Next comes the command queue

cl_command_queue cmdqueue=cmdqueue=clCreateCommandQueue(context,devices[j],NULL,&err);

Next I created 2 memory buffers, one to hold the input data and the other to hold the result.

cl_mem  Abuffer,Bbuffer;
Abuffer=clCreateBuffer(context, CL_MEM_READ_WRITE ,32*sizeof(int),NULL,&err);
Bbuffer=clCreateBuffer(context, CL_MEM_READ_WRITE ,32*sizeof(int),NULL,&err);

Then I copied the data of array A to Abuffer

ret=clEnqueueWriteBuffer(cmdqueue, Abuffer, CL_TRUE, 0, 32*sizeof(int), A, 0, NULL, NULL);
printf("%d",ret);//output is 0 thus data written successfully into the buffer

The kernel code was then read into a character string source_str and the program was created.

kernelprgrm=clCreateProgramWithSource(context,1,(const char **)&source_str,(const size_t *)&source_size,&err);
    if(!err)
    {
        printf("\nKernel program created successfully\n");
    }//Outputs -Kernel program created successfully

I then built the program using:

ret=clBuildProgram(kernelprgrm,1,&devices[j],NULL,NULL,NULL);//returns CL_SUCCESS

Getting buildinfo next

ret=clGetProgramBuildInfo(kernelprgrm,devices[j], CL_PROGRAM_BUILD_STATUS ,0,NULL,&size);//Returns success

Creating kernel

kernel = clCreateKernel(kernelprgrm, "dataParallel", &ret);
    printf("\nReturn kernel program=%d",ret);
    if(!ret)
    {
        printf("\nProgram created successfully!\n");
    }
    //Outputs -Program created successfully!

Now comes the devil:-

ret=clSetKernelArg(kernel,0,sizeof(cl_mem),(void *) Abuffer);
printf("\nKernel argument 1 ret=%d",ret);

ret=clSetKernelArg(kernel,1,sizeof(cl_mem),(void *) Bbuffer);
printf("\nKernel argument 2 ret=%d",ret);

Both return -38 meaning CL_INVALID_MEM_OBJECT.

P.S.:As per the errors pointed out i.e. use &Abuffer instead of Abuffer in the argument and after making the necessary changes, both return 0

size_t global_item_size = 32;
size_t local_item_size = 32;

Also ret = clEnqueueNDRangeKernel(cmdqueue, kernel, 1, NULL,&global_item_size, &local_item_size, 0, NULL, NULL); returns 0.

Trying to get the result

ret = clEnqueueReadBuffer(cmdqueue, Bbuffer, CL_TRUE, 0, 32*sizeof(int), B, 0, NULL, NULL);` 
printf("\nB:-\n");
for (t=0; t < 32; t++) {
            printf("%d\t ", B[t]);
                }

This returns buildstatus=0 with core getting dumped for my AMD GPU (running AMD Accelerated Parallel Processing platform) and NVIDIA GPU whereas it works perfectly fine if the selected device is CPU using Intel(R) OpenCL platform.

Also I tried getting the build log using:

cl_build_status *status=(cl_build_status *)malloc(sizeof(cl_build_status )*size);
clGetProgramBuildInfo(kernelprgrm,devices[j], CL_PROGRAM_BUILD_STATUS ,size,status,NULL);
printf("\nBuild status=%d\n",*status);

//Getting build info if not successful

clGetProgramBuildInfo(kernelprgrm,devices[i], CL_PROGRAM_BUILD_LOG ,0,NULL,&size);
char *buildlog=(char*)malloc(size);
clGetProgramBuildInfo(kernelprgrm,devices[i], CL_PROGRAM_BUILD_LOG ,size,buildlog,NULL);
printf("\n!!!!!!!!!!!!!!!!!!!!!Program ended!!!!!!!!!!!\n");
printf("\n\nBuildlog:   %s\n\n",buildlog);

But it returns Buildlog: Compilation started Compilation done Linking started Linking done Device build started Device build done Kernel <dataParallel> was successfully vectorized (4) Done.

2

2 Answers

2
votes

Here's what the OpenCL 1.2 spec has to say about setting buffers as kernel arguments:

If the argument is a memory object (buffer, image or image array), the arg_value entry will be a pointer to the appropriate buffer, image or image array object.

So, you need to pass a pointer to the cl_mem objects:

ret=clSetKernelArg(kernel,0,sizeof(cl_mem),(void *) &Abuffer);
0
votes

Why are you using clEnqueueTask? I think you should use clEnqueueNDRangeKernel if you have parallel work to do. Also, just set the global work size; pass NULL for the local work group size. 32x32 is larger than some devices can do.