Using various tutorials/examples/documentations/forums online, I have typed out code to allow CUDA to manipulate OpenGL textures such that it can be outputted to the screen. My method of displaying is to use PBO and an allocated texture image of uchar4 array. Despite all my attempts at fixing the problem, the texture would not show up on the 2D surface. I cannot seem to pinpoint the problem.
These are all the things I have checked/done thus far: I have created a PBO and registered it with CUDA, called cudaGraphicsResourceGetMappedPointer and the unmapping equivalent before and after the GPU function calls, made sure that glEnable is called for 2D_TEXTURE, glDisable called for any unnecessary values, unbinded textures/buffers when not in need. I have also reset SFML OpenGL states in case SFML was the cause. Square textures have also been employed. My OpenGL verision and CUDA version work for all function calls I use.
There did not seem to be any errors within the program when I checked cudaErrors and OpenGL Errors.
I am not sure if this has something to do with it but when I call:
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
My quad does not seem to display.
I have mainly found inspiration from this website. Thank you very much!
Here is my code:
Main.cpp
#include <GL/glew.h>
#include <windows.h>
#include <GL/GL.h>
#include <SFML/Window.hpp>
#include <SFML/OpenGL.hpp>
#include <SFML/System.hpp>
#include <SFML/Graphics/RenderWindow.hpp>
#include "GeneralTypedef.h"
#include "OpenGLTest.cuh"
int main()
{
// create the window
sf::RenderWindow window(sf::VideoMode(1024, 1024), "OpenGL");
//window.setVerticalSyncEnabled(true);
sf::Vector2u windowSize;
windowSize = sf::Vector2u(window.getSize());
bool running = true;
glewInit();
window.resetGLStates();
std::printf("OpenGL: %s:", glGetString(GL_VERSION));
// We will not be using SFML's gl states.
OpenGLTest* test = new OpenGLTest(window.getSize());
sf::Time time;
while (running)
{
// handle events
sf::Event event;
while (window.pollEvent(event))
{
if (event.type == sf::Event::Closed)
{
// end the program
running = false;
}
else if (event.type == sf::Event::Resized)
{
// adjust the viewport when the window is resized
glViewport(0, 0, event.size.width, event.size.height);
windowSize = window.getSize();
}
}
// clear the buffers
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
test->createFrame(time.asMicroseconds());
test->drawFrame();
window.display();
}
// release resources...
delete test;
return 0;
}
OpenGLTest.cuh
#ifndef OPENGLTEST_CUH
#define OPENGLTEST_CUH
#include <GL/glew.h>
#include <windows.h>
#include <GL/GL.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include <SFML/OpenGL.hpp>
#include <SFML/Graphics.hpp>
#include <SFML/System.hpp>
#include "GeneralTypedef.h"
class OpenGLTest
{
public:
uchar4* image;
GLuint gltexture;
GLuint pbo;
cudaGraphicsResource_t cudaPBO;
uchar4* d_textureBufferData;
sf::Vector2u windowSize;
OpenGLTest(sf::Vector2u windowSize)
{
this->windowSize = sf::Vector2u(windowSize);
this->setupOpenGL();
};
~OpenGLTest()
{
delete image;
image == nullptr;
cudaFree(d_textureBufferData);
d_textureBufferData == nullptr;
glDeleteTextures(1, &gltexture);
}
void drawFrame();
void createFrame(float time);
private:
void setupOpenGL();
};
#endif //OPENGLTEST_CUH
OpenGLTest.cu
#include "OpenGLTest.cuh"
__global__ void createGPUTexture(uchar4* d_texture)
{
uint pixelID = blockIdx.x*blockDim.x + threadIdx.x;
d_texture[pixelID].x = 0;
d_texture[pixelID].y = 1;
d_texture[pixelID].z = 1;
d_texture[pixelID].w = 0;
}
__global__ void wow(uchar4* pos, unsigned int width, unsigned int height,
float time)
{
int index = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int x = index%width;
unsigned int y = index / width;
if (index < width*height) {
unsigned char r = (x + (int)time) & 0xff;
unsigned char g = (y + (int)time) & 0xff;
unsigned char b = ((x + y) + (int)time) & 0xff;
// Each thread writes one pixel location in the texture (textel)
pos[index].w = 0;
pos[index].x = r;
pos[index].y = g;
pos[index].z = b;
}
}
void OpenGLTest::drawFrame()
{
glColor3f(1.0f,1.0f,1.0f);
glBindTexture(GL_TEXTURE_2D, gltexture);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, windowSize.x, windowSize.y, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glBegin(GL_QUADS);
glTexCoord2f(0.0f, 0.0f);
glVertex2f(0.0f, float(windowSize.y));
glTexCoord2f(1.0f, 0.0f);
glVertex2f(float(windowSize.x), float(windowSize.y));
glTexCoord2f(1.0f, 1.0f);
glVertex2f(float(windowSize.x), 0.0f);
glTexCoord2f(0.0f,1.0f);
glVertex2f(0.0f, 0.0f);
glEnd();
glFlush();
// Release
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glBindTexture(GL_TEXTURE_2D, 0);
// Test Triangle
/*
glBegin(GL_TRIANGLES);
glColor3f(0.1, 0.2, 0.3);
glVertex2f(0, 0);
glVertex2f(10, 0);
glVertex2f(0, 100);
glEnd();
*/
}
void OpenGLTest::createFrame(float time)
{
cudaGraphicsMapResources(1, &cudaPBO, 0);
size_t numBytes;
cudaGraphicsResourceGetMappedPointer((void**)&d_textureBufferData, &numBytes, cudaPBO);
int totalThreads = windowSize.x * windowSize.y;
int nBlocks = totalThreads/ 256;
// Run code here.
createGPUTexture << <nBlocks, 256>> >(d_textureBufferData);
//wow << <nBlocks, 256 >> >(d_textureBufferData, windowSize.x, windowSize.y, time);
// Unmap mapping to PBO so that OpenGL can access.
cudaGraphicsUnmapResources(1, &cudaPBO, 0);
}
void OpenGLTest::setupOpenGL()
{
image = new uchar4[1024*1024];
glViewport(0, 0, windowSize.x, windowSize.y);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0.0, windowSize.x, windowSize.y, 0.0, -1.0, 1.0);
glEnable(GL_TEXTURE_2D);
glDisable(GL_LIGHTING);
glDisable(GL_DEPTH_TEST);
// Unbind any textures from previous.
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
// Create new textures.
glGenTextures(1, &gltexture);
glBindTexture(GL_TEXTURE_2D, gltexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Create image with same resolution as window.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, windowSize.x , windowSize.y, 0, GL_RGBA, GL_UNSIGNED_BYTE, image);
// Create pixel buffer boject.
glGenBuffers(1, &pbo);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, windowSize.x * windowSize.y * sizeof(uchar4), image, GL_STREAM_COPY);
cudaGraphicsGLRegisterBuffer(&cudaPBO, pbo, cudaGraphicsMapFlagsNone);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glBindTexture(GL_TEXTURE_2D, 0);
}
GeneralType
#ifndef GENERALTYPEDEF_CUH
#define GENERALTYPEDEF_CUH
typedef unsigned int uint;
#endif // GENERALTYPEDEF_CUH