1
votes

I'm trying to add soft shadows to a modified Doom3 engine using FBO + stencil texture attachment that I bind and use in the light interaction fragment shader. It works good enough, but there's a serious performance problem on a Radeon 460 (I don't have other AMD GPU's but suspect it's same or worse since it's relatively new).

I'm on the latest drivers.

The fps drop is so bad that it's actually faster to do qglCopyTexImage2D to another texture (per each light!) than bind the stencil texture used in FBO.

Another problem is that when I try to optimize qglCopyTexImage2D with qglCopyTexSubImage2D it's starting to flicker.

Any real-use advice on stencil texture from fellow programmers?

Both nVidia and Intel appear to perform well in regard of speed here.

        globalImages->currentRenderImage->Bind();
        globalImages->currentRenderImage->uploadWidth = curWidth; // used as a shader param
        globalImages->currentRenderImage->uploadHeight = curHeight;
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined

        globalImages->currentRenderFbo->Bind();
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined

        if ( glConfig.vendor != glvAny ) { 
            globalImages->currentStencilFbo->Bind();
            globalImages->currentStencilFbo->uploadWidth = curWidth;
            globalImages->currentStencilFbo->uploadHeight = curHeight;
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_STENCIL_INDEX8, curWidth, curHeight, 0, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0 );
        }

        globalImages->currentDepthImage->Bind();
        globalImages->currentDepthImage->uploadWidth = curWidth; // used as a shader param
        globalImages->currentDepthImage->uploadHeight = curHeight;
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        if ( glConfig.vendor == glvIntel ) { // FIXME allow 24-bit depth for low-res monitors
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, curWidth, curHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0 );
        } else {
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, curWidth, curHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0 );
        }
    }

    // (re-)attach textures to FBO
    if ( !fboId || r_fboSharedColor.IsModified() || r_fboSharedDepth.IsModified() ) {
        // create a framebuffer object, you need to delete them when program exits.
        if ( !fboId )
            qglGenFramebuffers( 1, &fboId );
        qglBindFramebuffer( GL_FRAMEBUFFER_EXT, fboId );
        // attach a texture to FBO color attachement point
        qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, globalImages->currentRenderImage->texnum, 0 );
        // attach a renderbuffer to depth attachment point
        GLuint depthTex = r_fboSharedDepth.GetBool() ? globalImages->currentDepthImage->texnum : globalImages->currentDepthFbo->texnum;
        qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
        if ( glConfig.vendor == glvIntel ) // separate stencil, thank God
            qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, globalImages->currentStencilFbo->texnum, 0 );
        else
            qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
        int status = qglCheckFramebufferStatus( GL_FRAMEBUFFER );
        if ( GL_FRAMEBUFFER_COMPLETE != status ) { // something went wrong, fall back to default
            common->Printf( "glCheckFramebufferStatus %d\n", status );
            qglDeleteFramebuffers( 1, &fboId );
            fboId = 0; // try from scratch next time
            r_useFbo.SetBool( false );
        }
        qglBindFramebuffer( GL_FRAMEBUFFER, 0 ); // not obvious, but let it be 
    }
    qglBindFramebuffer( GL_FRAMEBUFFER, fboId );
    qglClear( GL_COLOR_BUFFER_BIT ); // otherwise transparent skybox blends with previous frame
    fboUsed = true;
    GL_CheckErrors();
}

/*
 Soft shadows vendor specific implementation
 Intel: separate stencil, direct access, fastest
 nVidia: combined stencil & depth, direct access, fast
 AMD: combined stencil & depth, direct access very slow, resorting to stencil copy
 */

void FB_CopyStencil() { // duzenko: why, AMD? WHY?? 
    if ( glConfig.vendor != glvAMD || !r_softShadows.GetBool() )
        return;
    globalImages->currentStencilFbo->Bind();
    qglCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 0, 0, glConfig.vidWidth, glConfig.vidHeight, 0 );
    /*globalImages->currentDepthFbo->Bind();
    idScreenRect& r = backEnd.currentScissor;
    //qglCopyTexSubImage2D( GL_TEXTURE_2D, 0, r.x1, r.y1, r.x1, r.y1, r.x2 - r.x1 + 1, r.y2 - r.y1 + 1 );*/
    GL_CheckErrors();
}

void FB_BindStencilTexture() {
    const GLenum GL_DEPTH_STENCIL_TEXTURE_MODE = 0x90EA;
    idImage* stencil = glConfig.vendor != glvAny ? globalImages->currentStencilFbo : globalImages->currentDepthImage;
    stencil->Bind();
    if ( glConfig.vendor != glvIntel )
        glTexParameteri( GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX );
}
1

1 Answers

0
votes

I ended up with two framebuffers: one for shadows only and the other for everything else. The shadow texture is an FBO attachment in the former and bound as texture2D in the latter.