I've been reading through the CUDA documentation and it seems to me, that every buffer that needs to interface with OpenGL needs to be created in the glBuffer.
According to the nvidia programming guide, this has to be done like this:
GLuint positionsVBO;
struct cudaGraphicsResource* positionsVBO_CUDA;
int main() {
// Explicitly set device
cudaGLSetGLDevice(0);
// Initialize OpenGL and GLUT
...
glutDisplayFunc(display);
// Create buffer object and register it with CUDA
glGenBuffers(1, positionsVBO);
glBindBuffer(GL_ARRAY_BUFFER, &vbo);
unsigned int size = width * height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cudaGraphicsGLRegisterBuffer(&positionsVBO_CUDA, positionsVBO, cudaGraphicsMapFlagsWriteDiscard);
// Launch rendering loop
glutMainLoop();
}
void display() {
// Map buffer object for writing from CUDA
float4* positions;
cudaGraphicsMapResources(1, &positionsVBO_CUDA, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&positions, &num_bytes, positionsVBO_CUDA));
// Execute kernel
dim3 dimBlock(16, 16, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
createVertices<<<dimGrid, dimBlock>>>(positions, time, width, height);
// Unmap buffer object
cudaGraphicsUnmapResources(1, &positionsVBO_CUDA, 0);
// Render from buffer object
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, positionsVBO);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, width * height);
glDisableClientState(GL_VERTEX_ARRAY);
// Swap buffers
glutSwapBuffers();
glutPostRedisplay();
}
void deleteVBO() {
cudaGraphicsUnregisterResource(positionsVBO_CUDA);
glDeleteBuffers(1, &positionsVBO);
}
__global__ void createVertices(float4* positions, float time, unsigned int width, unsigned int height) {
// [....]
}
Is there a way to give the cudaMalloc created memory space directly to OpenGL? I've got already working code written on cuda and I want to put my float4 array directly into OpenGL.
Say if've got already code like:
float4 *cd = (float4*) cudaMalloc(elements*sizeof(float4)).
do_something<<<16,1>>>(cd);
And I wanted to display the output of do_something through OpenGL.
Side note: why is the cudaGraphicsResourceGetMappedPointer function run on every timestep?