I have some generated SPIR-V code which I want to use with the vulkan API. But I get an
Exception thrown at 0x00007FFB68D933CB (nvoglv64.dll) in vulkanCompute.exe: 0xC0000005: Access violation reading location 0x0000000000000008.
when trying to create the pipline with vkCreateComputePipelines
.
The API calls should be fine, because the same code works with a shader compiled with glslangValidator. Therefore I assume that the generated SPIR-V code must be illformed somehow.
I've checked the SPIR-V code with the validator tool from khronos, using spirv-val --target-env vulkan1.1 mainV.spv
which exited without error. Anyhow it is also known that this tool is still incomplete.
I've also tried to use the Radeon GPU Analyzer to compile my SPIR-V code, which is also available online at the shader playground and this tool throws the error Error: Error: internal error: Bil::BilInstructionConvert::Create(60) Code Not Tested!
which is not really helpful, but encourages the assumption that the code is malformed.
The SPIR-V code is unfortunately to long to post it here, but it is in the link of the shader playground.
Does anyone know what the problem is with my setting or has any idea how I can verify my SPIR-V code in a better way, without checking all 700 lines of code manually.
I don't thinkt the problem is there, but anyway here is the c++ host code:
#include "vulkan/vulkan.hpp"
#include <iostream>
#include <fstream>
#include <vector>
#define BAIL_ON_BAD_RESULT(result) \
if (VK_SUCCESS != (result)) \
{ \
fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); \
exit(-1); \
}
VkResult vkGetBestComputeQueueNPH(vk::PhysicalDevice &physicalDevice, uint32_t &queueFamilyIndex)
{
auto properties = physicalDevice.getQueueFamilyProperties();
int i = 0;
for (auto prop : properties)
{
vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
if (!(vk::QueueFlagBits::eGraphics & maskedFlags) && (vk::QueueFlagBits::eCompute & maskedFlags))
{
queueFamilyIndex = i;
return VK_SUCCESS;
}
i++;
}
i = 0;
for (auto prop : properties)
{
vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
if (vk::QueueFlagBits::eCompute & maskedFlags)
{
queueFamilyIndex = i;
return VK_SUCCESS;
}
i++;
}
return VK_ERROR_INITIALIZATION_FAILED;
}
int main(int argc, const char *const argv[])
{
(void)argc;
(void)argv;
try
{
// initialize the vk::ApplicationInfo structure
vk::ApplicationInfo applicationInfo("VecAdd", 1, "Vulkan.hpp", 1, VK_API_VERSION_1_1);
// initialize the vk::InstanceCreateInfo
std::vector<char *> layers = {
"VK_LAYER_LUNARG_api_dump",
"VK_LAYER_KHRONOS_validation"
};
vk::InstanceCreateInfo instanceCreateInfo({}, &applicationInfo, static_cast<uint32_t>(layers.size()), layers.data());
// create a UniqueInstance
vk::UniqueInstance instance = vk::createInstanceUnique(instanceCreateInfo);
auto physicalDevices = instance->enumeratePhysicalDevices();
for (auto &physicalDevice : physicalDevices)
{
auto props = physicalDevice.getProperties();
// get the QueueFamilyProperties of the first PhysicalDevice
std::vector<vk::QueueFamilyProperties> queueFamilyProperties = physicalDevice.getQueueFamilyProperties();
uint32_t computeQueueFamilyIndex = 0;
// get the best index into queueFamiliyProperties which supports compute and stuff
BAIL_ON_BAD_RESULT(vkGetBestComputeQueueNPH(physicalDevice, computeQueueFamilyIndex));
std::vector<char *>extensions = {"VK_EXT_external_memory_host", "VK_KHR_shader_float16_int8"};
// create a UniqueDevice
float queuePriority = 0.0f;
vk::DeviceQueueCreateInfo deviceQueueCreateInfo(vk::DeviceQueueCreateFlags(), static_cast<uint32_t>(computeQueueFamilyIndex), 1, &queuePriority);
vk::StructureChain<vk::DeviceCreateInfo, vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceShaderFloat16Int8Features> createDeviceInfo = {
vk::DeviceCreateInfo(vk::DeviceCreateFlags(), 1, &deviceQueueCreateInfo, 0, nullptr, static_cast<uint32_t>(extensions.size()), extensions.data()),
vk::PhysicalDeviceFeatures2(),
vk::PhysicalDeviceShaderFloat16Int8Features()
};
createDeviceInfo.get<vk::PhysicalDeviceFeatures2>().features.setShaderInt64(true);
createDeviceInfo.get<vk::PhysicalDeviceShaderFloat16Int8Features>().setShaderInt8(true);
vk::UniqueDevice device = physicalDevice.createDeviceUnique(createDeviceInfo.get<vk::DeviceCreateInfo>());
auto memoryProperties2 = physicalDevice.getMemoryProperties2();
vk::PhysicalDeviceMemoryProperties const &memoryProperties = memoryProperties2.memoryProperties;
const int32_t bufferLength = 16384;
const uint32_t bufferSize = sizeof(int32_t) * bufferLength;
// we are going to need two buffers from this one memory
const vk::DeviceSize memorySize = bufferSize * 3;
// set memoryTypeIndex to an invalid entry in the properties.memoryTypes array
uint32_t memoryTypeIndex = VK_MAX_MEMORY_TYPES;
for (uint32_t k = 0; k < memoryProperties.memoryTypeCount; k++)
{
if ((vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent) & memoryProperties.memoryTypes[k].propertyFlags &&
(memorySize < memoryProperties.memoryHeaps[memoryProperties.memoryTypes[k].heapIndex].size))
{
memoryTypeIndex = k;
std::cout << "found memory " << memoryTypeIndex + 1 << " out of " << memoryProperties.memoryTypeCount << std::endl;
break;
}
}
BAIL_ON_BAD_RESULT(memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS);
auto memory = device->allocateMemoryUnique(vk::MemoryAllocateInfo(memorySize, memoryTypeIndex));
auto in_buffer = device->createBufferUnique(vk::BufferCreateInfo(vk::BufferCreateFlags(), bufferSize, vk::BufferUsageFlagBits::eStorageBuffer, vk::SharingMode::eExclusive));
device->bindBufferMemory(in_buffer.get(), memory.get(), 0);
// create a DescriptorSetLayout
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBinding{
{0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}};
vk::UniqueDescriptorSetLayout descriptorSetLayout = device->createDescriptorSetLayoutUnique(vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), static_cast<uint32_t>(descriptorSetLayoutBinding.size()), descriptorSetLayoutBinding.data()));
std::cout << "Memory bound" << std::endl;
std::ifstream myfile;
myfile.open("shaders/MainV.spv", std::ios::ate | std::ios::binary);
if (!myfile.is_open())
{
std::cout << "File not found" << std::endl;
return EXIT_FAILURE;
}
auto size = myfile.tellg();
std::vector<unsigned int> shader_spv(size / sizeof(unsigned int));
myfile.seekg(0);
myfile.read(reinterpret_cast<char *>(shader_spv.data()), size);
myfile.close();
std::cout << "Shader size: " << shader_spv.size() << std::endl;
auto shaderModule = device->createShaderModuleUnique(vk::ShaderModuleCreateInfo(vk::ShaderModuleCreateFlags(), shader_spv.size() * sizeof(unsigned int), shader_spv.data()));
// create a PipelineLayout using that DescriptorSetLayout
vk::UniquePipelineLayout pipelineLayout = device->createPipelineLayoutUnique(vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), 1, &descriptorSetLayout.get()));
vk::ComputePipelineCreateInfo computePipelineInfo(
vk::PipelineCreateFlags(),
vk::PipelineShaderStageCreateInfo(
vk::PipelineShaderStageCreateFlags(),
vk::ShaderStageFlagBits::eCompute,
shaderModule.get(),
"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE6VecAdd"),
pipelineLayout.get());
auto pipeline = device->createComputePipelineUnique(nullptr, computePipelineInfo);
auto descriptorPoolSize = vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer, 2);
auto descriptorPool = device->createDescriptorPool(vk::DescriptorPoolCreateInfo(vk::DescriptorPoolCreateFlags(), 1, 1, &descriptorPoolSize));
auto commandPool = device->createCommandPoolUnique(vk::CommandPoolCreateInfo(vk::CommandPoolCreateFlags(), computeQueueFamilyIndex));
auto commandBuffer = std::move(device->allocateCommandBuffersUnique(vk::CommandBufferAllocateInfo(commandPool.get(), vk::CommandBufferLevel::ePrimary, 1)).front());
commandBuffer->begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)));
commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, pipeline.get());
commandBuffer->dispatch(bufferSize / sizeof(int32_t), 1, 1);
commandBuffer->end();
auto queue = device->getQueue(computeQueueFamilyIndex, 0);
vk::SubmitInfo submitInfo(0, nullptr, nullptr, 1, &commandBuffer.get(), 0, nullptr);
queue.submit(1, &submitInfo, vk::Fence());
queue.waitIdle();
printf("all done\nWoohooo!!!\n\n");
}
}
catch (vk::SystemError &err)
{
std::cout << "vk::SystemError: " << err.what() << std::endl;
exit(-1);
}
catch (std::runtime_error &err)
{
std::cout << "std::runtime_error: " << err.what() << std::endl;
exit(-1);
}
catch (...)
{
std::cout << "unknown error\n";
exit(-1);
}
return EXIT_SUCCESS;
}
spirv-dis
; I think you don't need to check all 700 lines to verify the basics.... – krOozeVK_KHR_variable_pointers
(or Vulkan 1.1), but I do not see your code checking the version of your instance and physical device. – krOozevariablePointersStorageBuffer
feature that you did not enable. – krOozespirv-dis
? I have looked at the disasambled code if you mean that. But of course not in detail. Yes, you are right, I haven't enabledVK_KHR_variable_pointers
. But shouldn't it be enough to useVK_API_VERSION_1_1
in the application info when creating the instance? Anyhow, I've tried enabling the feature and the additional extension, but still the same problem. I've also checked the device, it uses Vulkan 1.2 and also supports the needed extensions. – tade