I am implementing a ray tracer using openCL. I have installed NVidia's CUDA sdk and everything seems to be set up fine, both my platforms are detected (Intel's and Nvidia's), and each one sees its devices (intel has HD Graphics 4000 and Nvidia has my GPU: GeForce GT 630M).
My problem is that I am able to run my application using the Intel platform but not using Nvidia's platform. I don't believe the problem is in my code, but here is my device code:
#include "constants.h" //only a couple of #define
typedef struct Sphere {
float x, y, z;
float radius;
float r, g, b;
}Sphere;
float hit(Sphere s, float ox, float oy, float *n) {
float radius = s.radius;
float dx = ox - s.x;
float dy = oy - s.y;
if (dx*dx + dy*dy < radius*radius) {
float dz = sqrt(radius*radius - dx*dx - dy*dy);
*n = dz / sqrt(radius * radius);
return dz + s.z;
}
return -INF;
}
__kernel void rayTracer(__global Sphere* spheres, write_only image2d_t res) {
// Get the index of the current element to be processed
int x = get_global_id(0);
int y = get_global_id(1);
int ox = x - WIDTH / 2;
int oy = y - HEIGHT / 2;
float r = 0, g = 0, b = 0;
float maxz = (float) -INF;
for (int i = 0; i<NUM_SPHERES; i++)
{
float n;
float t = hit(spheres[i], ox, oy, &n);
if (t > maxz)
{
float fscale = 1;
r = spheres[i].r * fscale;
g = spheres[i].g * fscale;
b = spheres[i].b * fscale;
}
}
write_imagei(res, (int2)(x, y), (int4)(r, g, b, 0));
}
My host application is also straightforward. I simply initialize openCL structures, setup the data and then read it back.
Again, when using the Intel platform my application runs fine and I can see the raytraced image. When using Nvidia's, although the API error codes are always 0, no result is displayed.
Does anybody have any Ideas what might be the problem?
Thanks in advance
---EDIT---
Here are some pieces of host code
Setting up OpenCL structures:
//Setup OpenCL
cl_platform_id platform = getPlatforms();
cl_device_id device = getDevices(platform, CL_DEVICE_TYPE_GPU);
cl_context_properties ctxProps[] =
{
CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
0, 0
};
cl_context ctx = clCreateContext(ctxProps, 1, &device, NULL, NULL, &err);
cl_command_queue queue1 = clCreateCommandQueue(ctx, device, NULL, &err);
GetPlatforms and GetDevices are functions that asks the user to chose a platform and device
Creating the program and building it:
cl_program prog = clCreateProgramWithSource(ctx, 1, srcs, &srcSize, &err);
err = clBuildProgram(prog, 1, &device, NULL, NULL, NULL);
if (err < 0)
{
//PRINT BUILD ERROR
size_t log_size;
clGetProgramBuildInfo(prog, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char* log = (char*)calloc(log_size + 1, sizeof(char));
clGetProgramBuildInfo(prog, device, CL_PROGRAM_BUILD_LOG, log_size + 1, log, NULL);
printf("%s/n", log);
free(log);
std::cin >> err;
return 1;
}
cl_kernel krn = clCreateKernel(prog, "rayTracer", &err);
//....CREATE SOME SPHERES...
//Setup device data
cl_image_format fmt;
fmt.image_channel_order = CL_RGBA;
fmt.image_channel_data_type = CL_UNSIGNED_INT8;
cl_mem spheresBuff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, spheres.size() * sizeof(Sphere), spheres.data(), &err);
cl_mem resBuff = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &fmt, WIDTH, HEIGHT, 0, NULL, &err);
//Setup kernel arguments
err = clSetKernelArg(krn, 0, sizeof(cl_mem), (void*)&spheresBuff);
err = clSetKernelArg(krn, 1, sizeof(cl_mem), (void*)&resBuff);
//Run kernel
size_t gSize[] = { WIDTH, HEIGHT };
err = clEnqueueNDRangeKernel(queue1, krn, 2, NULL, gSize, NULL, 0, NULL, NULL);
//Read result
Image img = createRGBAImage(WIDTH, HEIGHT);
size_t origin[] = { 0, 0, 0 };
size_t region[] = { WIDTH , HEIGHT , 1 };
err = clEnqueueReadImage(queue1, resBuff, CL_TRUE, origin, region, 0, 0, img.pixel.data(), 0, NULL, NULL);
write_imageuiinstead ofwrite_imageisince your image is aCL_UNSIGNED_INT8, although I'm not sure how much this will matter in practice. - jprice