I'm trying to add soft shadows to a modified Doom3 engine using FBO + stencil texture attachment that I bind and use in the light interaction fragment shader. It works good enough, but there's a serious performance problem on a Radeon 460 (I don't have other AMD GPU's but suspect it's same or worse since it's relatively new).
I'm on the latest drivers.
The fps drop is so bad that it's actually faster to do qglCopyTexImage2D
to another texture (per each light!) than bind the stencil texture used in FBO.
Another problem is that when I try to optimize qglCopyTexImage2D
with qglCopyTexSubImage2D
it's starting to flicker.
Any real-use advice on stencil texture from fellow programmers?
Both nVidia and Intel appear to perform well in regard of speed here.
globalImages->currentRenderImage->Bind();
globalImages->currentRenderImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentRenderImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
globalImages->currentRenderFbo->Bind();
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined
if ( glConfig.vendor != glvAny ) {
globalImages->currentStencilFbo->Bind();
globalImages->currentStencilFbo->uploadWidth = curWidth;
globalImages->currentStencilFbo->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
qglTexImage2D( GL_TEXTURE_2D, 0, GL_STENCIL_INDEX8, curWidth, curHeight, 0, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0 );
}
globalImages->currentDepthImage->Bind();
globalImages->currentDepthImage->uploadWidth = curWidth; // used as a shader param
globalImages->currentDepthImage->uploadHeight = curHeight;
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
if ( glConfig.vendor == glvIntel ) { // FIXME allow 24-bit depth for low-res monitors
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, curWidth, curHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0 );
} else {
qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, curWidth, curHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0 );
}
}
// (re-)attach textures to FBO
if ( !fboId || r_fboSharedColor.IsModified() || r_fboSharedDepth.IsModified() ) {
// create a framebuffer object, you need to delete them when program exits.
if ( !fboId )
qglGenFramebuffers( 1, &fboId );
qglBindFramebuffer( GL_FRAMEBUFFER_EXT, fboId );
// attach a texture to FBO color attachement point
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, globalImages->currentRenderImage->texnum, 0 );
// attach a renderbuffer to depth attachment point
GLuint depthTex = r_fboSharedDepth.GetBool() ? globalImages->currentDepthImage->texnum : globalImages->currentDepthFbo->texnum;
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
if ( glConfig.vendor == glvIntel ) // separate stencil, thank God
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, globalImages->currentStencilFbo->texnum, 0 );
else
qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
int status = qglCheckFramebufferStatus( GL_FRAMEBUFFER );
if ( GL_FRAMEBUFFER_COMPLETE != status ) { // something went wrong, fall back to default
common->Printf( "glCheckFramebufferStatus %d\n", status );
qglDeleteFramebuffers( 1, &fboId );
fboId = 0; // try from scratch next time
r_useFbo.SetBool( false );
}
qglBindFramebuffer( GL_FRAMEBUFFER, 0 ); // not obvious, but let it be
}
qglBindFramebuffer( GL_FRAMEBUFFER, fboId );
qglClear( GL_COLOR_BUFFER_BIT ); // otherwise transparent skybox blends with previous frame
fboUsed = true;
GL_CheckErrors();
}
/*
Soft shadows vendor specific implementation
Intel: separate stencil, direct access, fastest
nVidia: combined stencil & depth, direct access, fast
AMD: combined stencil & depth, direct access very slow, resorting to stencil copy
*/
void FB_CopyStencil() { // duzenko: why, AMD? WHY??
if ( glConfig.vendor != glvAMD || !r_softShadows.GetBool() )
return;
globalImages->currentStencilFbo->Bind();
qglCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 0, 0, glConfig.vidWidth, glConfig.vidHeight, 0 );
/*globalImages->currentDepthFbo->Bind();
idScreenRect& r = backEnd.currentScissor;
//qglCopyTexSubImage2D( GL_TEXTURE_2D, 0, r.x1, r.y1, r.x1, r.y1, r.x2 - r.x1 + 1, r.y2 - r.y1 + 1 );*/
GL_CheckErrors();
}
void FB_BindStencilTexture() {
const GLenum GL_DEPTH_STENCIL_TEXTURE_MODE = 0x90EA;
idImage* stencil = glConfig.vendor != glvAny ? globalImages->currentStencilFbo : globalImages->currentDepthImage;
stencil->Bind();
if ( glConfig.vendor != glvIntel )
glTexParameteri( GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX );
}