4
votes

I am using CUDA to generate this ABGR output image. The image in question is stored in a uchar4 array. Each element of the array represents the color of each pixel in the image. Obviously, this output array is a 2D image but it is allocated in CUDA as a linear memory of interleaved bytes.

I know that CUDA can easily map this array to an OpenGL Vertex Buffer Object. My question is, assuming that I have the RGB value of every pixel in an image, along with the width and height of the image, how can I draw this image to screen using OpenGL?
I know that some kind of shader must be involved but since my knowledge is very little, I have no idea how a shader can use the color of each pixel, but map it to correct screen pixels.

I know I should increase my knowledge in OpenGL, but this seems like a trivial task. If there is an easy way for me to draw this image, I'd rather not spend much time learning OpenGL.

1
Just draw a sprite (a quad, consisting of 2 triangles, with your texture mapped over it)Ivan Aksamentov - Drop
@Drop how can I do that?Maghoumi
Have you looked at any of the CUDA/OpenGL interop sample codes? If you just have raw pixel data (seems to be your case), the image interop example may be of interest. If you want to actually manipulate geometry, the simple OpenGL sample may be of interest.Robert Crovella

1 Answers

6
votes

I finally figured out an easy way to do what I wanted. Unfortunately, I did not know about the existence of the sample that Robert was talking about on NVIDIA's website.

Long story short, the easiest way to draw the image was to define a Pixel Buffer Object in OpenGL, register the buffer with CUDA and pass it as an output array of uchar4 to the CUDA kernel. Here is a quick pseudo-code based on JOGL and JCUDA that shows the steps involved. Most of the code was obtained from the sample on NVIDIA's website:

1) Creaing the OpenGL buffers

GL2 gl = drawable.getGL().getGL2();

int[] buffer = new int[1];

// Generate buffer
gl.glGenBuffers(1, IntBuffer.wrap(buffer));
glBuffer = buffer[0];

// Bind the generated buffer
gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, glBuffer);
// Specify the size of the buffer (no data is pre-loaded in this buffer)
gl.glBufferData(GL2.GL_ARRAY_BUFFER, imageWidth * imageHeight * 4, (Buffer)null, GL2.GL_DYNAMIC_DRAW);
gl.glBindBuffer(GL2.GL_ARRAY_BUFFER, 0);

// The bufferResource is of type CUgraphicsResource and is defined as a class field
this.bufferResource = new CUgraphicsResource();

// Register buffer in CUDA
cuGraphicsGLRegisterBuffer(bufferResource, glBuffer, CUgraphicsMapResourceFlags.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);

2) Initialize the texture and set texture parameters

GL2 gl = drawable.getGL().getGL2();
int[] texture = new int[1];

gl.glGenTextures(1, IntBuffer.wrap(texture));
this.glTexture = texture[0];

gl.glBindTexture(GL2.GL_TEXTURE_2D, glTexture);

gl.glTexParameteri(GL2.GL_TEXTURE_2D, GL2.GL_TEXTURE_MIN_FILTER, GL2.GL_LINEAR);
gl.glTexParameteri(GL2.GL_TEXTURE_2D, GL2.GL_TEXTURE_MAG_FILTER, GL2.GL_LINEAR);


gl.glTexImage2D(GL2.GL_TEXTURE_2D, 0, GL2.GL_RGBA8, imageWidth, imageHeight, 0, GL2.GL_BGRA, GL2.GL_UNSIGNED_BYTE, (Buffer)null);

gl.glBindTexture(GL2.GL_TEXTURE_2D, 0); 

3) Run the CUDA kernel and display the results in OpenGL's display loop.

this.runCUDA();

GL2 gl = drawable.getGL().getGL2();

gl.glBindBuffer(GL2.GL_PIXEL_UNPACK_BUFFER, glBuffer);

gl.glBindTexture(GL2.GL_TEXTURE_2D, glTexture);
gl.glTexSubImage2D(GL2.GL_TEXTURE_2D, 0, 0, 0,
                imageWidth, imageHeight,
                GL2.GL_RGBA, GL2.GL_UNSIGNED_BYTE, 0); //The last argument must be ZERO! NOT NULL! :-)

gl.glBindBuffer(GL2.GL_PIXEL_PACK_BUFFER, 0);
gl.glBindBuffer(GL2.GL_PIXEL_UNPACK_BUFFER, 0);

gl.glBindTexture(GL2.GL_TEXTURE_2D, glTexture);
gl.glEnable(GL2.GL_TEXTURE_2D);
gl.glDisable(GL2.GL_DEPTH_TEST);
gl.glDisable(GL2.GL_LIGHTING);
gl.glTexEnvf(GL2.GL_TEXTURE_ENV, GL2.GL_TEXTURE_ENV_MODE, GL2.GL_REPLACE);

gl.glMatrixMode(GL2.GL_PROJECTION);
gl.glPushMatrix();
gl.glLoadIdentity();
gl.glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);

gl.glMatrixMode(GL2.GL_MODELVIEW);
gl.glLoadIdentity();

gl.glViewport(0, 0, imageWidth, imageHeight);


gl.glBegin(GL2.GL_QUADS);
    gl.glTexCoord2f(0.0f, 1.0f);
    gl.glVertex2f(-1.0f, -1.0f);


    gl.glTexCoord2f(1.0f, 1.0f);
    gl.glVertex2f(1.0f, -1.0f);


    gl.glTexCoord2f(1.0f, 0.0f);
    gl.glVertex2f(1.0f, 1.0f);


    gl.glTexCoord2f(0.0f, 0.0f);
    gl.glVertex2f(-1.0f, 1.0f);
gl.glEnd();

gl.glMatrixMode(GL2.GL_PROJECTION);
gl.glPopMatrix();

gl.glDisable(GL2.GL_TEXTURE_2D);

3.5) The CUDA call:

public void runCuda(GLAutoDrawable drawable) {

    devOutput = new CUdeviceptr();
    // Map the OpenGL buffer to a resource and then obtain a CUDA pointer to that resource
    cuGraphicsMapResources(1, new CUgraphicsResource[]{bufferResource}, null);
    cuGraphicsResourceGetMappedPointer(devOutput, new long[1], bufferResource);

    // Setup the kernel parameters making sure that the devOutput pointer is passed to the kernel
    Pointer kernelParams = 
                            .
                            .
                            .
                            .

    int gridSize = (int) Math.ceil(imageWidth * imageHeight / (double)DESC_BLOCK_SIZE);

    cuLaunchKernel(function,
            gridSize, 1, 1,
            DESC_BLOCK_SIZE, 1, 1,
            0, null,
            kernelParams, null);
    cuCtxSynchronize();

    // Unmap the buffer so that it can be used in OpenGL
    cuGraphicsUnmapResources(1, new CUgraphicsResource[]{bufferResource}, null);
}

PS: I thank Robert for providing the link to the sample. I also thank the people who downvoted my question without any useful feedback!