0
votes

I've been having issues for the past week about this, as I can't get my head around what on earth this is. I'm creating a game using Vulkan, however upon setting up the renderer for it caused some awkward inaccuracies with the texture mapping of the rendertexture, causing it to scale down to a quarter of the screen, which doesn't make sense to me. Not only this, but when I updated my Nvidia driver to 388.0, not only did the issue not go away, but the textures were doing the same thing as well: This is the result of the final image display on an Nvidia GTX 870M with driver v388.0

Also note that the screen texture you see before you has been "scaled" to the bottom right quarter of the screen (as if it had been resized to width/2 x height/2), which is not correct...

The implementation of the renderer follows along one forward rendering pass, an hdr pass, and then a final output to swapchain image pass. The Forward pass and the HDR pass use their own command buffers to submit to the graphics queue, and they are signaled by semaphores.

  VkCommandBuffer offscreenCmd = mOffscreen.cmdBuffer->Handle();
  VkSemaphore waitSemas[] = { mRhi->SwapchainObject()->ImageAvailableSemaphore() };
  VkSemaphore signalSemas[] = { mOffscreen.semaphore->Handle() };
  VkPipelineStageFlags waitFlags[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };

  VkSubmitInfo offscreenSI = {};
  offscreenSI.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
  offscreenSI.pCommandBuffers = &offscreenCmd;
  offscreenSI.commandBufferCount = 1;
  offscreenSI.signalSemaphoreCount = 1;
  offscreenSI.pSignalSemaphores = signalSemas;
  offscreenSI.waitSemaphoreCount = 1;
  offscreenSI.pWaitSemaphores = waitSemas;
  offscreenSI.pWaitDstStageMask = waitFlags;

  VkSubmitInfo hdrSI = offscreenSI;
  VkSemaphore hdrWaits[] = { mOffscreen.semaphore->Handle() };
  VkSemaphore hdrSignal[] = { mHDR.semaphore->Handle() };
  VkCommandBuffer hdrCmd = mHDR.cmdBuffer->Handle();
  hdrSI.pCommandBuffers = &hdrCmd;
  hdrSI.pSignalSemaphores = hdrSignal;
  hdrSI.pWaitSemaphores = hdrWaits;

  // Update materials before rendering the frame.
  UpdateMaterials();

  // begin frame. This is where we start our render process per frame.
  BeginFrame();
  while (mOffscreen.cmdBuffer->Recording() || !mRhi->CmdBuffersComplete()) {}

    // Offscreen PBR Forward Rendering Pass.
    mRhi->GraphicsSubmit(offscreenSI);

    // High Dynamic Range and Gamma Pass.
    mRhi->GraphicsSubmit(hdrSI);

    // Before calling this cmd buffer, we want to submit our offscreen buffer first, then
    // ssent our signal to our swapchain cmd buffers.
    VkSemaphore waitSemaphores[] = { mHDR.semaphore->Handle() };
    mRhi->SubmitCurrSwapchainCmdBuffer(1, waitSemaphores);

    // Render the Overlay.
    RenderOverlay();

  EndFrame();

What's even more interesting is that when I ran the same code on an Intel Kaby Lake cpu with a 6th gen gpu and vulkan support, the output image was exactly correct, as expected!

So I am not sure if this is a driver bug, or not: looking at how I implemented the render textures:

void Renderer::SetUpRenderTextures()
{
  Texture* pbrColor = mRhi->CreateTexture();
  Texture* pbrDepth = mRhi->CreateTexture();
  Sampler* pbrSampler = mRhi->CreateSampler();
  Texture* hdrTexture = mRhi->CreateTexture();
  Sampler* hdrSampler = mRhi->CreateSampler();

  gResources().RegisterSampler("HDRGammaSampler", hdrSampler);
  gResources().RegisterRenderTexture("HDRGammaTexture", hdrTexture);
  gResources().RegisterRenderTexture("PBRColor", pbrColor);
  gResources().RegisterRenderTexture("PBRDepth", pbrDepth);
  gResources().RegisterSampler("PBRSampler", pbrSampler);

  VkImageCreateInfo cImageInfo = { };
  VkImageViewCreateInfo cViewInfo = { };

  cImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
  cImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
  cImageInfo.imageType = VK_IMAGE_TYPE_2D;
  cImageInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
  cImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  cImageInfo.mipLevels = 1;
  cImageInfo.extent.depth = 1;
  cImageInfo.arrayLayers = 1;
  cImageInfo.extent.width = mWindowHandle->Width();
  cImageInfo.extent.height = mWindowHandle->Height();
  cImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
  cImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  cImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;

  cViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 
  cViewInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
  cViewInfo.image = nullptr; // No need to set the image, texture->Initialize() handles this for us.
  cViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
  cViewInfo.subresourceRange = { };
  cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  cViewInfo.subresourceRange.baseArrayLayer = 0;
  cViewInfo.subresourceRange.baseMipLevel = 0;
  cViewInfo.subresourceRange.layerCount = 1;
  cViewInfo.subresourceRange.levelCount = 1;

  pbrColor->Initialize(cImageInfo, cViewInfo);
  // Using the same info, only we are chaning the format to rgba8 unorm attachments
  cImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  cViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  hdrTexture->Initialize(cImageInfo, cViewInfo);

  cImageInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
  cImageInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;

  cViewInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;
  cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;

  pbrDepth->Initialize(cImageInfo, cViewInfo);

  VkSamplerCreateInfo samplerCI = { };
  samplerCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
  samplerCI.magFilter = VK_FILTER_LINEAR;
  samplerCI.minFilter = VK_FILTER_LINEAR;
  samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
  samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
  samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
  samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
  samplerCI.compareEnable = VK_FALSE;
  samplerCI.mipLodBias = 0.0f;
  samplerCI.maxAnisotropy = 16.0f;
  samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
  samplerCI.maxLod = 1.0f;
  samplerCI.minLod = 0.0f;
  samplerCI.unnormalizedCoordinates = VK_FALSE;

  pbrSampler->Initialize(samplerCI);
  hdrSampler->Initialize(samplerCI);

  Sampler* defaultSampler = mRhi->CreateSampler();
  defaultSampler->Initialize(samplerCI);
  gResources().RegisterSampler("DefaultSampler", defaultSampler);

  VkImageCreateInfo dImageInfo = {};
  VkImageViewCreateInfo dViewInfo = {};

  dImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
  dImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
  dImageInfo.imageType = VK_IMAGE_TYPE_2D;
  dImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  dImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  dImageInfo.mipLevels = 1;
  dImageInfo.extent.depth = 1;
  dImageInfo.arrayLayers = 1;
  dImageInfo.extent.width = mWindowHandle->Width();
  dImageInfo.extent.height = mWindowHandle->Height();
  dImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
  dImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  dImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;

  dViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
  dViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  dViewInfo.image = nullptr; // No need to set the image, texture handles this for us.
  dViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
  dViewInfo.subresourceRange = {};
  dViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  dViewInfo.subresourceRange.baseArrayLayer = 0;
  dViewInfo.subresourceRange.baseMipLevel = 0;
  dViewInfo.subresourceRange.layerCount = 1;
  dViewInfo.subresourceRange.levelCount = 1;

  Texture* defaultTexture = mRhi->CreateTexture();

  defaultTexture->Initialize(dImageInfo, dViewInfo);
  gResources().RegisterRenderTexture("DefaultTexture", defaultTexture);
}

And the FrameBuffers:

void Renderer::SetUpFrameBuffers()
{
  Texture* pbrColor = gResources().GetRenderTexture("PBRColor");
  Texture* pbrDepth = gResources().GetRenderTexture("PBRDepth");

  FrameBuffer* pbrFrameBuffer = mRhi->CreateFrameBuffer();
  gResources().RegisterFrameBuffer("PBRFrameBuffer", pbrFrameBuffer);

  FrameBuffer* hdrFrameBuffer = mRhi->CreateFrameBuffer();
  gResources().RegisterFrameBuffer("HDRGammaFrameBuffer", hdrFrameBuffer);


  VkAttachmentDescription attachmentDescriptions[2];
  attachmentDescriptions[0].format = VK_FORMAT_R16G16B16A16_SFLOAT;
  attachmentDescriptions[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  attachmentDescriptions[0].finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  attachmentDescriptions[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
  attachmentDescriptions[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
  attachmentDescriptions[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
  attachmentDescriptions[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
  attachmentDescriptions[0].samples = VK_SAMPLE_COUNT_1_BIT;
  attachmentDescriptions[0].flags = 0;

  attachmentDescriptions[1].format = VK_FORMAT_D24_UNORM_S8_UINT;
  attachmentDescriptions[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  attachmentDescriptions[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
  attachmentDescriptions[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
  attachmentDescriptions[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
  attachmentDescriptions[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
  attachmentDescriptions[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
  attachmentDescriptions[1].samples = VK_SAMPLE_COUNT_1_BIT;
  attachmentDescriptions[1].flags = 0;   

  VkSubpassDependency dependencies[2];
  dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
  dependencies[0].dstSubpass = 0;
  dependencies[0].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
  dependencies[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
  dependencies[0].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
  dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
  dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;  

  dependencies[1].srcSubpass = 0;
  dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
  dependencies[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
  dependencies[1].dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
  dependencies[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
  dependencies[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
  dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;

  VkAttachmentReference attachmentColorRef = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL };
  VkAttachmentReference attachmentDepthRef = { 1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL };

  VkSubpassDescription subpass = { };
  subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
  subpass.colorAttachmentCount = 1;
  subpass.pColorAttachments = &attachmentColorRef;
  subpass.pDepthStencilAttachment = &attachmentDepthRef;

  VkRenderPassCreateInfo renderpassCI = { };
  renderpassCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
  renderpassCI.attachmentCount = 2;
  renderpassCI.pAttachments = attachmentDescriptions;
  renderpassCI.subpassCount = 1;
  renderpassCI.pSubpasses = &subpass;
  renderpassCI.dependencyCount = 2;
  renderpassCI.pDependencies = dependencies;


  VkImageView attachments[2];
  attachments[0] = pbrColor->View();
  attachments[1] = pbrDepth->View();

  VkFramebufferCreateInfo framebufferCI = {};
  framebufferCI.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
  framebufferCI.height = mWindowHandle->Height();
  framebufferCI.width = mWindowHandle->Width();
  framebufferCI.renderPass = nullptr; // The finalize call handles this for us.
  framebufferCI.layers = 1;
  framebufferCI.attachmentCount = 2;
  framebufferCI.pAttachments = attachments;

  pbrFrameBuffer->Finalize(framebufferCI, renderpassCI);

  // No need to render any depth, as we are only writing on a 2d surface.
  subpass.pDepthStencilAttachment = nullptr;
  attachments[0] = gResources().GetRenderTexture("HDRGammaTexture")->View();
  attachments[1] = nullptr;
  framebufferCI.attachmentCount = 1;
  attachmentDescriptions[0].format = VK_FORMAT_R8G8B8A8_UNORM;
  renderpassCI.attachmentCount = 1;

  hdrFrameBuffer->Finalize(framebufferCI, renderpassCI);
}

And finally, looking at how textures are initialized:

void Sampler::Initialize(VkSamplerCreateInfo& info)
{
  if (vkCreateSampler(mOwner, &info, nullptr, &mSampler) != VK_SUCCESS) {
    R_DEBUG("ERROR: Sampler failed to initialize!\n");
  }
}


void Sampler::CleanUp()
{
  if (mSampler) {
    vkDestroySampler(mOwner, mSampler, nullptr);
    mSampler = VK_NULL_HANDLE;
  }
}


void Texture::Initialize(const VkImageCreateInfo& imageInfo, 
  VkImageViewCreateInfo& viewInfo, b8 stream) // Ignore "stream" as it doesnt do anything yet...
{
  if (vkCreateImage(mOwner, &imageInfo, nullptr, &mImage) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to create image!\n");
    return;
  }

  VkMemoryRequirements memoryRequirements;
  vkGetImageMemoryRequirements(mOwner, mImage, &memoryRequirements);
  VkMemoryAllocateInfo allocInfo = { };
  allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  allocInfo.allocationSize = memoryRequirements.size;
  allocInfo.memoryTypeIndex = VulkanRHI::gPhysicalDevice.FindMemoryType(memoryRequirements.memoryTypeBits, 
    VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);

  if (vkAllocateMemory(mOwner, &allocInfo, nullptr, &mMemory) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to allocate host memory for image!\n");
    return;
  }

  if (vkBindImageMemory(mOwner, mImage, mMemory, 0) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to bind memory to image!\n");
    return;
  }

  viewInfo.image = mImage;
  if (vkCreateImageView(mOwner, &viewInfo, nullptr, &mView) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to create image view!\n");
  }
}


void Texture::CleanUp()
{
  if (mImage) {
    vkDestroyImage(mOwner, mImage, nullptr);
    mImage = VK_NULL_HANDLE;
  }

  if (mView) {
    vkDestroyImageView(mOwner, mView, nullptr);
    mView = VK_NULL_HANDLE;
  }

  if (mMemory) {
    vkFreeMemory(mOwner, mMemory, nullptr);
    mMemory = VK_NULL_HANDLE;
  }
}


void Texture::Upload(VulkanRHI* rhi, Recluse::Image const& image)
{
  VkDeviceSize imageSize = image.Width() * image.Height() * 4;
  Buffer stagingBuffer;
  stagingBuffer.SetOwner(mOwner);

  VkBufferCreateInfo stagingCI = { };
  stagingCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  stagingCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  stagingCI.size = imageSize;
  stagingCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;

  stagingBuffer.Initialize(stagingCI, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);

  VkResult result = stagingBuffer.Map();
    memcpy(stagingBuffer.Mapped(), image.Data(), imageSize);
  stagingBuffer.UnMap();

  CommandBuffer buffer;
  buffer.SetOwner(mOwner);
  buffer.Allocate(rhi->GraphicsCmdPool(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);

  VkCommandBufferBeginInfo beginInfo = { };
  beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
  beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;

  // TODO(): Copy buffer to image stream.
  buffer.Begin(beginInfo);
    VkImageMemoryBarrier imgBarrier = { };
    imgBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    imgBarrier.image = mImage;
    imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    imgBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
    imgBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    imgBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    imgBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    imgBarrier.srcAccessMask = 0;
    imgBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    imgBarrier.subresourceRange.baseArrayLayer = 0;
    imgBarrier.subresourceRange.baseMipLevel = 0;
    imgBarrier.subresourceRange.layerCount = 1;
    imgBarrier.subresourceRange.levelCount = 1;

    // Image memory barrier.
    buffer.PipelineBarrier(
      VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 
      0,  
      0, nullptr, 
      0, nullptr, 
      1, &imgBarrier
    );

    VkBufferImageCopy region = { };
    region.bufferOffset = 0;
    region.bufferImageHeight = 0;
    region.bufferRowLength = 0;
    region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    region.imageSubresource.baseArrayLayer = 0;
    region.imageSubresource.layerCount = 1;
    region.imageSubresource.mipLevel = 0;
    region.imageExtent.width = image.Width();
    region.imageExtent.height = image.Height();
    region.imageExtent.depth = 1;
    region.imageOffset = { 0, 0, 0 };

    // Send buffer image copy cmd.
    buffer.CopyBufferToImage(stagingBuffer.Handle(), mImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL , 1, &region);

    imgBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
    imgBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
    imgBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    imgBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;

    buffer.PipelineBarrier(
      VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
      0,
      0, nullptr,
      0, nullptr,
      1, &imgBarrier
    );

  buffer.End();

  // TODO(): Submit it to graphics queue!
  VkCommandBuffer commandbuffers[] = { buffer.Handle() };

  VkSubmitInfo submit = { };
  submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
  submit.commandBufferCount = 1;
  submit.pCommandBuffers = commandbuffers;

  rhi->GraphicsSubmit(submit);
  rhi->GraphicsWaitIdle();

  buffer.Free();
  stagingBuffer.CleanUp();
}

I apologize if this is a humongous amount of code and information I displayed, and the many poor design decisions on it, but my question is if this is a driver bug, or if I'm doing something in my code that is not correct, so as to display the final output with proper texture mapping? Also, I am not very knowledgeable on the subject either, so I apologize again if I am not making any sense with what I'm trying to explain.

Using RenderDoc I also obtained information about each renderpass in the rendering pipeline for both nvidia and intel gpus:

Intel 6th Gen
Nvidia GTX870M

you can see the source code of the engine in my github, to get a bigger picture of the renderer, which is inside the engine directory, and it's implementation in this link

If you are looking for a simple "works right out of the box" version of the compiled program, you can download this link here: Recluse Zip

From there, unzip and find the Recluse.sln file in Build64 directory, then compile and set "Test" as the start up project. Or you can find the exe in the Release/Debug output directories.

1
Are validation layers displaying anything during application execution? I can try to compile, execute and check Your code both on Nvidia and Intel hardware but I will have time in the evening (my time) so in about 10-12 hours. - Ekzuzy
Do You have a compiled version of Your project? Or even better - a project that has all the required files and dependencies already included? I cannot create and compile a solution due to the lack of required SDKs (I'd rather not install unnecessary software). - Ekzuzy
Hey @Ekzuzy, yes I have the compiled project, and required files needed for it. I have to apologize as I forgot to list how to set up the project and and the dependencies for the final exe, I'll do so when I get off work. The project can also be compiled without the physx and wwise sdks (since those are for separate modules) inside the CMakeList.txt in root dir as well as in Engine dir. My bad as well, since I wasnt expecting this heh... I'll update the question with the exe as soon as I get off work (around 4hrs my time). Also, thank you for looking into this as well! - wubw
Oh @Ekzuzy, to answer your question about the validation layers issue. It doesnt seem to show any warnings or anything when enabled, which was strange for me as well since I ran this with validation layers. Both intel and nvidia gpus reported no errors ;_;. The way they are enabled is visible in RHI/VulkanContext file inside the Engine/Renderer directory. A function from VulkanContext enables validation layers, which is called by VulkanRHI on Initialize. - wubw
Today in the evening I will look at the version You provided in the zip archive. - Ekzuzy

1 Answers

1
votes

Ok, I'm a moron. The problem wasn't in the implementation of the renderer, but in the shader. Looking inside the HDRGamma.frag shader ( the shader that the HDR Pass uses), in Shader/Source directory:

#version 430
#extension GL_ARB_separate_shader_objects : enable
#extension GL_ARB_shading_language_420pack : enable

layout (location = 0) out vec4 fragColor;

in FRAG_IN {
  vec2 position;
  vec2 uv;
} frag_in;


layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;

layout (set = 0, binding = 2) uniform HDR {
  float gamma;
  float exposure;
  float pad[2];
  int   bloomEnabled;
  int   pad1[3];
} hdr;

You see that uniform HDR is padded into 4 floats, 4 ints, to which I thought I was aligning the buffer, but this is not the case, especially because passing bloomEnabled would not be read correctly in the shader. For the Intel GPU, I'm still not sure why it is fine here, but for Nvidia, this is not a correct design. If we pack our buffer as so:

layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;

layout (set = 0, binding = 2) uniform HDR {
  float gamma;
  float exposure;
  int   bloomEnabled;
  int   pad1;
} hdr;

The implementation is correct, gpu still reads 16 bytes, and bloomEnabled can be read fine by the shader. It turns out Nvidia does something interesting if you don't correctly align your buffers, but I'm still not sure why, as Intel, on the other hand, seems to have a way of ensuring the integrity of the output color attachment.

Either way, everything seems to be working fine now. Updated the source code to see the working implementation.