1
votes

I have been trying to find information on the (non-standard, cf. 1.0, 1.1, 1.2, 2.0) support for variadic macros in OpenCL implementations.

I have access to the following platforms, all of which support variadic macros:

  • Mac OS X, Intel CPU, OpenCL 1.2, driver: 1.1
  • Mac OS X, Intel GPU, OpenCL 1.2, driver: 1.2(Dec 23 2014 00:18:31)
  • Mac OS X, ATI GPU, OpenCL 1.2, driver: 1.2 (Aug 17 2014 20:27:52)
  • Mac OS X, Nvidia GPU, OpenCL 1.2, driver: 10.2.7 310.41.25f01

Could other please check their available implementations so that we can have a map of implementations that supported variadic macros?

Edit: Here is a self-contained test program that makes uses of a variadic macro.

#include <stdlib.h>
#include <stdio.h>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

const char* SOURCE =
"#define KERNEL(name, ...) kernel void name(__VA_ARGS__) \n"
"                                                        \n"
"KERNEL(test, global float* input, global float* output) \n"
"{                                                       \n"
"    int i = get_global_id(0);                           \n"
"    output[i] = input[i];                               \n"
"}                                                       \n"
"                                                        \n"
;

static const int GPU = 1;

int main(int argc, char** argv)
{
    int err;

    cl_float input[16];
    cl_float output[16];

    size_t global = 16;
    size_t local = 16;

    cl_platform_id platform_id;
    cl_device_id device_id;
    cl_context context;
    cl_command_queue command_queue;
    cl_program program;
    cl_kernel kernel;
    cl_mem input_buf;
    cl_mem output_buf;

    err = clGetPlatformIDs(1, &platform_id, NULL);
    if(err != CL_SUCCESS)
    {
        printf("error: clGetPlatformIDs\n");
        return EXIT_FAILURE;
    }

    err = clGetDeviceIDs(platform_id, GPU ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
    if(err != CL_SUCCESS)
    {
        printf("error: clGetDeviceIDs\n");
        return EXIT_FAILURE;
    }

    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    if(err != CL_SUCCESS)
    {
        printf("error: clCreateContext\n");
        return EXIT_FAILURE;
    }

    command_queue = clCreateCommandQueue(context, device_id, 0, &err);
    if(err != CL_SUCCESS)
    {
        printf("error: clCreateCommandQueue\n");
        return EXIT_FAILURE;
    }

    program = clCreateProgramWithSource(context, 1, &SOURCE, NULL, &err);
    if(err != CL_SUCCESS)
    {
        printf("error: clCreateProgramWithSource\n");
        return EXIT_FAILURE;
    }

    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("error: clBuildProgram\n");
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);

        return EXIT_FAILURE;
    }

    kernel = clCreateKernel(program, "test", &err);
    if(err != CL_SUCCESS)
    {
        printf("error: clCreateKernel\n");
        return EXIT_FAILURE;
    }

    input_buf = clCreateBuffer(context, CL_MEM_READ_ONLY, 16*sizeof(cl_float), NULL, NULL);
    output_buf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 16*sizeof(cl_float), NULL, NULL);
    if(!input_buf || !output_buf)
    {
        printf("error: clCreateBuffer\n");
        return EXIT_FAILURE;
    }

    err = clEnqueueWriteBuffer(command_queue, input_buf, CL_TRUE, 0, 16*sizeof(cl_float), input, 0, NULL, NULL);
    if(err != CL_SUCCESS)
    {
        printf("error: clEnqueueWriteBuffer\n");
        return EXIT_FAILURE;
    }

    err = 0;
    err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_buf);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_buf);
    if(err != CL_SUCCESS)
    {
        printf("error: clSetKernelArg\n");
        return EXIT_FAILURE;
    }

    err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
    if(err != CL_SUCCESS)
    {
        printf("error: clEnqueueNDRangeKernel\n");
        return EXIT_FAILURE;
    }

    clFinish(command_queue);

    err = clEnqueueReadBuffer(command_queue, output_buf, CL_TRUE, 0, 16*sizeof(cl_float), output, 0, NULL, NULL );
    if(err != CL_SUCCESS)
    {
        printf("error: clEnqueueReadBuffer\n");
        return EXIT_FAILURE;
    }

    clReleaseMemObject(input_buf);
    clReleaseMemObject(output_buf);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);

    printf("success\n");

    return EXIT_SUCCESS;
}
2
I'm surprised myself. I suspect that it is just Apple's implementation allowing for this.user2425792
@Cicada I have added a self-contained test program that contains a variadic macro. It compiles and runs without problem on the platforms indicated above.user2425792
On Linux, this works with AMD, doesn't work with NVIDIA or Intel. Aside from the spec not allowing them, there's no fundamental reason why they shouldn't work - it's just a simple preprocessor substitution.jprice

2 Answers

3
votes

For posterity, I will answer this myself with a list of platforms that were tested for support of variadic macros. Future visitor, please feel free to add any platform you can test to this list.

Supported:

  • Mac OS X, Intel CPU, OpenCL 1.2, driver: 1.1
  • Mac OS X, Intel GPU, OpenCL 1.2, driver: 1.2(Dec 23 2014 00:18:31)
  • Mac OS X, ATI GPU, OpenCL 1.2, driver: 1.2 (Aug 17 2014 20:27:52)
  • Debian unstable, AMD GPU, OpenCL 2.0, driver: amdgpu-pro 16.15.2-277429
  • Mac OS X, Nvidia GPU, OpenCL 1.2, driver: 10.2.7 310.41.25f01
  • Redhat Enterprise Linux 5, Nvidia CUDA SDK 4.0, OpenCL 1.0 CUDA, driver: 260.19.26

Unsupported:

  • Debian unstable, Intel GPU, OpenCL 1.2, driver: Beignet 1.1.1
0
votes

Unsupported:

  • Win10 x64 2004, Intel CPU & GPU, OpenCL 2.1
  • Win10 x64 2004, NVIDIA GPU, OpenCL 1.2 CUDA 11.0.140