2
votes

I've created this GLSL Compute Shader and compiled it using "glslangValidator.exe". However, it will only ever update the "Particles[i].Velocity" values and not any other values and this only happens in some instances. I've checked that the correct input values are sent in using "RenderDoc".

Buffer Usage Flag Bits

VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT

And the Property Flag Bits

VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT

GLSL Shader

#version 450
#extension GL_ARB_separate_shader_objects : enable

struct Particle
{
  vec3 Position;
  vec3 Velocity;
  vec3 IPosition;
  vec3 IVelocity;

  float LifeTime;
  float ILifetime;
};

layout(binding = 0) buffer Source
{
   Particle Particles[ ];
};

layout(binding = 1) uniform UBO
{
  mat4 model;
  mat4 view;
  mat4 proj;
  float time;
};

vec3 Gravity = vec3(0.0f,-0.98f,0.0f);
float dampeningFactor = 0.5;

void main(){
  uint i = gl_GlobalInvocationID.x;
  if(Particles[i].LifeTime > 0.0f){
    Particles[i].Velocity = Particles[i].Velocity + Gravity * dampeningFactor * time;
    Particles[i].Position = Particles[i].Position + Particles[i].Velocity * time;
    Particles[i].LifeTime = Particles[i].LifeTime - time;
  }else{
    Particles[i].Velocity = Particles[i].IVelocity;
    Particles[i].Position = Particles[i].IPosition;
    Particles[i].LifeTime = Particles[i].ILifetime;
  }
}

Descriptor Set Layout Binding

        VkDescriptorSetLayoutBinding descriptorSetLayoutBindings[2] = {
            { 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, 0 },
        { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, 0 }
        };

The Command Dispatch

vkCmdDispatch(computeCommandBuffers, MAX_PARTICLES , 1, 1);

The Submitting of the Queue

            VkSubmitInfo cSubmitInfo = {};
            cSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;

            cSubmitInfo.commandBufferCount = 1;
            cSubmitInfo.pCommandBuffers = &computeCommandBuffers;

            if (vkQueueSubmit(computeQueue.getQueue(), 1, &cSubmitInfo, computeFence) != VK_SUCCESS) {
                throw std::runtime_error("failed to submit compute command buffer!");
            }

            vkWaitForFences(device.getDevice(), 1, &computeFence, VK_TRUE, UINT64_MAX);

UPDATE: 13/05/2017 (More Information Added)

Particle Struct Definition in CPP

struct Particle {
    glm::vec3 location;
    glm::vec3 velocity;
    glm::vec3 initLocation;
    glm::vec3 initVelocity;

    float lifeTime;
    float initLifetime;
}

Data Mapping to Storage Buffer

            void* data;
            vkMapMemory(device.getDevice(), stagingBufferMemory, 0, bufferSize, 0, &data);
            memcpy(data, particles, (size_t)bufferSize);
            vkUnmapMemory(device.getDevice(), stagingBufferMemory);

            copyBuffer(stagingBuffer, computeBuffer, bufferSize);

Copy Buffer Function (by Alexander Overvoorde from vulkan-tutorial.com)

        void copyBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) {
            VkCommandBufferAllocateInfo allocInfo = {};
            allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
            allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
            allocInfo.commandPool = commandPool.getCommandPool();
            allocInfo.commandBufferCount = 1;

            VkCommandBuffer commandBuffer;
            vkAllocateCommandBuffers(device.getDevice(), &allocInfo, &commandBuffer);

            VkCommandBufferBeginInfo beginInfo = {};
            beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
            beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;

            vkBeginCommandBuffer(commandBuffer, &beginInfo);

            VkBufferCopy copyRegion = {};
            copyRegion.size = size;
            vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, &copyRegion);

            vkEndCommandBuffer(commandBuffer);

            VkSubmitInfo submitInfo = {};
            submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
            submitInfo.commandBufferCount = 1;
            submitInfo.pCommandBuffers = &commandBuffer;

            vkQueueSubmit(graphicsQueue.getQueue(), 1, &submitInfo, VK_NULL_HANDLE);
            vkQueueWaitIdle(graphicsQueue.getQueue());

            vkFreeCommandBuffers(device.getDevice(), commandPool.getCommandPool(), 1, &commandBuffer);

        }
1
How do You know members are not updated? Do You align storage buffer members in memory?Ekzuzy
The Storage Buffer is used as vertex input for the vertex shader, and I can see that only some of the particles move. Furthermore, I've checked and compared the same values from different frames using the aforementioned Graphics Debugger (RenderDoc).Hasan Al-Baghdadi
And for the second question, sorry I don't know what Aligning memory mean, is it the same as Allocating memiry and Binding the object to it?Hasan Al-Baghdadi
Storage and uniform buffer members must be placed at appropriate offsets (from the beginning of buffer's memory) which depend on the member's data type (or more specifically it's size). For example, if a member's size has N bytes (float has 4) than it must be placed in memory offset that is a multiple of N. BUT if a member is a vector with 3 elements, it must be placed at offsets that are a multiple of 4N. So Your Position member may start at 0 offset. But Velocity cannot be right after Position, but must be placed at 128 offset (4 * 32).Ekzuzy

1 Answers

2
votes

Have a look at this StackOverflow question:

Memory allocation with std430 qualifier

FINAL, CORRECTED ANSWER:

In Your case the biggest member of Your structure is vec3 (3-element vector of floats). Base alignment of vec3 is the same as alignment of vec4. So the base alignment of Your array's elements is equal to 16 bytes. This means that each element of Your array has to start at an address that is a multiple of 16.

But alignment rules have to be applied for each structure member recursively. 3-element vectors has the same alignment as 4-element vectors. This means that:

  • Position member starts at the same alignment as each array member
  • Velocity, IPosition and IVelocitymembers must start at multiples of 16 bytes after the beginning of a given array element.
  • LifeTime and ILifeTime members have a 4-bytes alignment.

So the total size of Your struct in bytes is equal to:

  • Position - 16 bytes (Position itself takes 12 bytes, but next member has a 16-byte alignment)
  • Velocity - 16 bytes
  • IPosition - 16 bytes
  • IVelocity + LifeTime - 16 bytes
  • ILifeTime - 4 bytes

which gives 68 bytes. So, as far as I understand it, You need a 12-byte padding at the end of Your structure (additional 12 bytes between array elements) because each array element must start at addresses which are a multiple of 16.

So the first array element starts at offset 0 of the memory bound to the storage buffer. But the second array element must start at offset 80 from the begging of the memory (nearest multiple of 16 greater than 68) and so on.

Or, as @NicolBolas commented, to make life easier, pack everything in vec4 members only ;-).

BETTER THOUGH NOT FULLY CORRECT ANSWER:

In Your case the biggest member of Your structure is vec3 (3-element vector of floats). So the base alignment of Your array's elements is equal to 12 bytes (in case of arrays of structs in std430 layout, the base alignment don't have to be rounded up to mach alignment of 4-element vectors. <- Here I was wrong. We don't have to round up structure's base alignment, but the alignment of its members is calculated normally, with vec3 alignment being the same as vec4 alignment). This means that each element of Your array has to start at an address that is a multiple of 12 (no, in this case it should start at a multiple of 16).

But alignment rules have to be applied for each structure member recursively. 3-element vectors has the same alignment as 4-element vectors. This means that:

  • Position member starts at the same alignment as each array member
  • Velocity, IPosition and IVelocitymembers must start at multiples of 16 bytes after the beginning of a given array element.
  • LifeTime and ILifeTime members have a 4-bytes alignment.

So the total size of Your struct in bytes is equal to:

  • Position - 16 bytes (Position itself takes 12 bytes, but next member has a 16-byte alignment)
  • Velocity - 16 bytes
  • IPosition - 16 bytes
  • IVelocity + LifeTime - 16 bytes
  • ILifeTime - 4 bytes

which gives 68 bytes. So, as far as I understand it, You need a 4-byte padding at the end of Your structure (additional 4 bytes between array elements) because each array element must start at addresses which are a multiple of 12 (again, we need 12-byte padding here so the next array elements starts at a multiple of 16, not 12).

So the first array element starts at offset 0 of the memory bound to the storage buffer. But the second array element must start at offset 72 from the begging of the memory (nearest multiple of 12 greater than 68) and so on.

PREVIOUS, WRONG ANSWER:

In Your case the biggest member is vec3 (3-element vector of floats). It's alignment is equal to 12 bytes (in case of arrays of structs we don't have to round alignment of 3-element vectors to mach alignment of 4-element vectors). The size of Your struct in bytes equals to 56. So, as far as I understand it, You need a 4-byte padding at the end of Your structure (additional 4 bytes between array elements) because each array element must start at addresses which are a multiple of 12.