1
votes

Suppose I already have a bitmap (an image) in Device (Video) Memory using cudaMalloc() and cudaMemcpy().

What is the simplest way to display this bitmap directly on screen (current window)?

I am guessing there should be an alternative to GDI's BitBlt()... but any approach (Direct3D, OpenGL or even GDI) will be fine.

Examples in CUDA SDK (OpenGL version) use Texture, that uses buffer (Pixel Buffer Object) for holding data, that is registered as CUDA-resource, that should be mapped/unmapped at each frame (glutDisplayFunc() call). And all that seems just a tiny little bit complicated and uncalled for.

1
"And all that seems just a tiny little bit complicated and uncalled for." Tough; that's the way it is. CUDA cannot draw anything. And CUDA lives in its own little world. To draw something, you need to get the data out of CUDA's world and into the world of something that can actually draw things. It would have been better to directly generate the CUDA data into an OpenGL or D3D object. - Nicol Bolas
Yes, I understand that CUDA is for computing only. If you like, you can forget about CUDA, because the main thing is that we have an address of linear located device memory that holds bitmap. All I need is to bit blit this bitmap to Framebuffer. I am guessing Direct2D could be one of the solutions, but unfortunately it's supported only in Windows 7/Vista. - AlexP
I'm pretty sure Direct2D does not take pointers to "linear located Device Memory" in its blitting functions. It take objects that represent those pointers. That's why the conversion layer is necessary; most drawing APIs do not just let you have pointers to GPU memory and blit from them. And even if they did, there's no guarantee that the CUDA pointer is the same kind of pointer that would be used by the other API(s). - Nicol Bolas
Got it! Guess I have no other choice but to do it the hard (and only) way... oh, well. - AlexP

1 Answers

2
votes

Okay, will try to answer my own question. Looks like there is a simpler way for displaying a bitmap via OpenGL. Instead of using Texture + Pixel Buffer Object, we could use cudaGraphicsGLRegisterImage() that supports texture format GL_RGBA8 since CUDA Toolkit 4.0.

For simplicity, example has no cutilSafeCall() or error checks. I have little knowledge of OpenGL and will be glad to hear recommendations.

#include <stdio.h>
#include <string.h>

#include <GL/glew.h>
#include <GL/freeglut.h>

#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>

/* Handles OpenGL-CUDA exchange. */
cudaGraphicsResource *cuda_texture;

/* Registers (+ resizes) CUDA-Texture (aka Renderbuffer). */
void resizeTexture(int w, int h) {
    static GLuint gl_texture = 0;

    /* Delete old CUDA-Texture. */
    if (gl_texture) {
        cudaGraphicsUnregisterResource(cuda_texture);
        glDeleteTextures(1, &gl_texture);
    } else glEnable(GL_TEXTURE_2D);

    glGenTextures(1, &gl_texture);
    glBindTexture(GL_TEXTURE_2D, gl_texture);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

    cudaGraphicsGLRegisterImage(&cuda_texture, gl_texture, GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard);
}

void updateMemDevice(cudaArray *memDevice, int w, int h) {
    struct Color {
        unsigned char r, g, b, a;
    } *memHost = new Color[w*h];

    memset(memHost, 128, w*h*4);
    for (int y = 0; y<h; ++y) {
        for (int x = 0; x<w; ++x) {
            Color &c = memHost[y*w + x];
            c.r = c.b = 255 * x/w;
        }
    }
    cudaMemcpyToArray(memDevice, 0, 0, memHost, w*h*4, cudaMemcpyHostToDevice);
    delete [] memHost;
}

void editTexture(int w, int h) {
    cudaGraphicsMapResources(1, &cuda_texture);
    cudaArray* memDevice;
    cudaGraphicsSubResourceGetMappedArray(&memDevice, cuda_texture, 0, 0);
    updateMemDevice(memDevice, w, h);
    cudaGraphicsUnmapResources(1, &cuda_texture);
}


void windowResizeFunc(int w, int h) {
    glViewport(-w, -h, w*2, h*2);
}

void displayFunc() {
    glBegin(GL_QUADS);
    glTexCoord2i(0, 0); glVertex2i(0, 0);
    glTexCoord2i(1, 0); glVertex2i(1, 0);
    glTexCoord2i(1, 1); glVertex2i(1, 1);
    glTexCoord2i(0, 1); glVertex2i(0, 1);
    glEnd();

    glFlush();
}


int main(int argc, char *argv[]) {
    /* Initialize OpenGL context. */
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB);
    glutInitWindowSize(400, 300);
    glutCreateWindow("Bitmap in Device Memory");
    glutReshapeFunc(windowResizeFunc);
    glutDisplayFunc(displayFunc);

    glewInit();

    cudaGLSetGLDevice(0);

    int width = 5, height = 5;
    resizeTexture(width, height);
    editTexture(width, height);

    glutMainLoop();
    return 0;
}