use staging buffer for increased performance, with separated command pool for memory transfers

This commit is contained in:
2024-07-08 20:41:12 +02:00
parent 83ff89daf8
commit bf21eae3f1
3 changed files with 119 additions and 45 deletions

View File

@@ -3,7 +3,7 @@ LDFLAGS = -lglfw -lvulkan -ldl -lpthread
CFLAGS = -g -pedantic -Wall -Wextra -Wshadow -Wunused-macros
VulkanApplication: main.c
tcc $(CFLAGS) -o VulkanApplication main.c $(LDFLAGS)
gcc $(CFLAGS) -o VulkanApplication main.c $(LDFLAGS)
.PHONY: run clean

158
main.c
View File

@@ -60,6 +60,7 @@ struct VulkanData {
VkDevice device;
VkQueue graphicsQueue;
VkQueue presentQueue;
VkQueue transferQueue;
VkSurfaceKHR surface;
VkSwapchainKHR swapChain;
VkImage swapChainImages[4];
@@ -72,6 +73,7 @@ struct VulkanData {
VkPipeline graphicsPipeline;
VkFramebuffer swapChainFramebuffers[4];
VkCommandPool commandPool;
VkCommandPool transferCommandPool;
VkCommandBuffer commandBuffers[MAX_FRAMES_IN_FLIGHT];
VkSemaphore imageAvailableSemaphores[MAX_FRAMES_IN_FLIGHT];
VkSemaphore renderFinishedSemaphores[MAX_FRAMES_IN_FLIGHT];
@@ -96,6 +98,7 @@ struct Optional {
struct QueueFamilyIndices {
struct Optional graphicsFamily;
struct Optional presentFamily;
struct Optional transferFamily;
};
static VkVertexInputBindingDescription getBindingDescription() {
@@ -355,6 +358,9 @@ static struct QueueFamilyIndices findQueueFamilies(VkPhysicalDevice physicalDevi
.presentFamily.v = 0,
.presentFamily.is_some = 0,
.transferFamily.v = 0,
.transferFamily.is_some = 0,
};
uint32_t queueFamilyCount = 0;
@@ -364,9 +370,15 @@ static struct QueueFamilyIndices findQueueFamilies(VkPhysicalDevice physicalDevi
for (uint32_t i = 0; i < queueFamilyCount; i++) {
VkQueueFamilyProperties queueFamily = queueFamilies[i];
if (queueFamily.queueFlags & VK_QUEUE_TRANSFER_BIT
&& !(queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
indices.transferFamily.v = i;
indices.transferFamily.is_some = true;
}
if (queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
indices.graphicsFamily.v = i;
indices.graphicsFamily.is_some = 1;
indices.graphicsFamily.is_some = true;
}
VkBool32 presentSupport = 0;
@@ -374,10 +386,10 @@ static struct QueueFamilyIndices findQueueFamilies(VkPhysicalDevice physicalDevi
if (presentSupport) {
indices.presentFamily.v = i;
indices.presentFamily.is_some = 1;
indices.presentFamily.is_some = true;
}
if (indices.presentFamily.is_some && indices.graphicsFamily.is_some) {
if (indices.presentFamily.is_some && indices.graphicsFamily.is_some && indices.transferFamily.is_some) {
break;
}
}
@@ -412,7 +424,7 @@ static bool isDeviceSuitable(VkPhysicalDevice physicalDevice, VkSurfaceKHR surfa
deviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
deviceFeatures.geometryShader &&
swapChainAdequate &&
indices.graphicsFamily.is_some && indices.presentFamily.is_some;
indices.graphicsFamily.is_some && indices.presentFamily.is_some && indices.transferFamily.is_some;
}
static void pickPhysicalDevice(struct VulkanData* data) {
@@ -444,10 +456,11 @@ static void createLogicalDevice(struct VulkanData* data) {
struct QueueFamilyIndices indices = findQueueFamilies(data->physicalDevice, data->surface);
float queuePriority = 1.0f;
VkDeviceQueueCreateInfo queueCreateInfos[2];
uint32_t uniqueQueueFamilies[] = {indices.graphicsFamily.v, indices.presentFamily.v};
int queueCount = 3;
VkDeviceQueueCreateInfo queueCreateInfos[queueCount];
uint32_t uniqueQueueFamilies[] = {indices.graphicsFamily.v, indices.transferFamily.v, indices.presentFamily.v};
for (int i = 0; i < 2; i++) {
for (int i = 0; i < queueCount; i++) {
uint32_t queueFamily = uniqueQueueFamilies[i];
VkDeviceQueueCreateInfo queueCreateInfo = {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
@@ -458,12 +471,11 @@ static void createLogicalDevice(struct VulkanData* data) {
queueCreateInfos[i] = queueCreateInfo;
}
int queueCreateInfoCount = 2;
if (indices.graphicsFamily.v == indices.presentFamily.v) {
queueCreateInfoCount = 1;
queueCount--;
}
for (int i = 0; i < queueCreateInfoCount; i++) {
for (int i = 0; i < queueCount; i++) {
uint32_t queueFamily = uniqueQueueFamilies[i];
VkDeviceQueueCreateInfo queueCreateInfo = {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
@@ -486,7 +498,7 @@ static void createLogicalDevice(struct VulkanData* data) {
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.pEnabledFeatures = &deviceFeatures,
.enabledExtensionCount = DEVICE_EXTENSION_COUNT,
.queueCreateInfoCount = queueCreateInfoCount,
.queueCreateInfoCount = queueCount,
.pQueueCreateInfos = queueCreateInfos,
.ppEnabledExtensionNames = DEVICE_EXTENSIONS,
};
@@ -502,7 +514,7 @@ static void createLogicalDevice(struct VulkanData* data) {
printf("ERROR: Failed to create logical device\n");
exit(1);
}
if (!indices.graphicsFamily.is_some) {
if (!indices.transferFamily.is_some) {
printf("no graphics\n");
}
if (!indices.presentFamily.is_some) {
@@ -510,6 +522,7 @@ static void createLogicalDevice(struct VulkanData* data) {
}
vkGetDeviceQueue(data->device, indices.graphicsFamily.v, 0, &data->graphicsQueue);
vkGetDeviceQueue(data->device, indices.presentFamily.v, 0, &data->presentQueue);
vkGetDeviceQueue(data->device, indices.transferFamily.v, 0, &data->transferQueue);
}
static VkExtent2D chooseSwapExtent(VkSurfaceCapabilitiesKHR* capabilities, GLFWwindow* window) {
@@ -564,6 +577,10 @@ static void createSwapChain(struct VulkanData* data, GLFWwindow* window) {
imageCount = swapChainSupport.capabilities.maxImageCount;
}
struct QueueFamilyIndices indices = findQueueFamilies(data->physicalDevice, data->surface);
uint32_t queueFamilyIndices[] = {indices.graphicsFamily.v, indices.presentFamily.v};
uint32_t otherFamilyIndices[] = {indices.transferFamily.v, indices.graphicsFamily.v};
VkSwapchainCreateInfoKHR createInfo = {
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.surface = data->surface,
@@ -573,26 +590,20 @@ static void createSwapChain(struct VulkanData* data, GLFWwindow* window) {
.imageExtent = extent,
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.queueFamilyIndexCount = 2,
.imageSharingMode = VK_SHARING_MODE_CONCURRENT,
.pQueueFamilyIndices = otherFamilyIndices,
.preTransform = swapChainSupport.capabilities.currentTransform,
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
.presentMode = presentMode,
.clipped = VK_TRUE,
.oldSwapchain = VK_NULL_HANDLE,
};
struct QueueFamilyIndices indices = findQueueFamilies(data->physicalDevice, data->surface);
uint32_t queueFamilyIndices[] = {indices.graphicsFamily.v, indices.presentFamily.v};
if (indices.graphicsFamily.v != indices.presentFamily.v) {
createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
createInfo.queueFamilyIndexCount = 2;
createInfo.pQueueFamilyIndices = queueFamilyIndices;
} else {
createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
createInfo.queueFamilyIndexCount = 0; // Optional
createInfo.pQueueFamilyIndices = NULL; // Optional
}
createInfo.preTransform = swapChainSupport.capabilities.currentTransform;
createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
createInfo.presentMode = presentMode;
createInfo.clipped = VK_TRUE;
createInfo.oldSwapchain = VK_NULL_HANDLE;
if (vkCreateSwapchainKHR(data->device, &createInfo, NULL, &data->swapChain) != VK_SUCCESS) {
fprintf(stderr, "ERROR: Failed to create swap chain");
@@ -928,10 +939,21 @@ static void createCommandPool(struct VulkanData* data) {
.queueFamilyIndex = queueFamilyIndices.graphicsFamily.v,
};
VkCommandPoolCreateInfo transferPoolInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT | VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
.queueFamilyIndex = queueFamilyIndices.transferFamily.v,
};
if (vkCreateCommandPool(data->device, &poolInfo, NULL, &data->commandPool) != VK_SUCCESS) {
fprintf(stderr, "ERROR: failed to create command pool\n");
exit(1);
}
if (vkCreateCommandPool(data->device, &transferPoolInfo, NULL, &data->transferCommandPool) != VK_SUCCESS) {
fprintf(stderr, "ERROR: failed to create transfer command pool\n");
exit(1);
}
}
static void createCommandBuffers(struct VulkanData* data) {
@@ -1105,7 +1127,7 @@ static void createSyncObjects(struct VulkanData* data) {
}
}
uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties, struct VulkanData* data) {
static uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties, struct VulkanData* data) {
VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(data->physicalDevice, &memProperties);
@@ -1118,39 +1140,90 @@ uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties, s
exit(1);
}
static void createVertexBuffer(struct VulkanData* data) {
static void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer* buffer, VkDeviceMemory* bufferMemory, struct VulkanData* data) {
struct QueueFamilyIndices indices = findQueueFamilies(data->physicalDevice, data->surface);
uint32_t otherFamilyIndices[] = {indices.transferFamily.v, indices.graphicsFamily.v};
VkBufferCreateInfo bufferInfo = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = sizeof(vertices[0]) * VERTEX_COUNT,
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.flags = 0,
.size = size,
.usage = usage,
.sharingMode = VK_SHARING_MODE_CONCURRENT,
.queueFamilyIndexCount = 2,
.pQueueFamilyIndices = otherFamilyIndices,
};
if (vkCreateBuffer(data->device, &bufferInfo, NULL, &data->vertexBuffer) != VK_SUCCESS) {
fprintf(stderr, "ERROR: Failed to create vertex buffer\n");
if (vkCreateBuffer(data->device, &bufferInfo, NULL, buffer) != VK_SUCCESS) {
fprintf(stderr, "ERROR: Failed to create buffer\n");
exit(1);
}
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(data->device, data->vertexBuffer, &memRequirements);
vkGetBufferMemoryRequirements(data->device, *buffer, &memRequirements);
VkMemoryAllocateInfo allocInfo = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = memRequirements.size,
.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, data),
.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties, data),
};
if (vkAllocateMemory(data->device, &allocInfo, NULL, &data->vertexBufferMemory) != VK_SUCCESS) {
fprintf(stderr, "ERROR: Failed to allocate vertex buffer memory\n");
if (vkAllocateMemory(data->device, &allocInfo, NULL, bufferMemory) != VK_SUCCESS) {
fprintf(stderr, "ERROR: Failed to allocate buffer memory\n");
exit(1);
}
vkBindBufferMemory(data->device, data->vertexBuffer, data->vertexBufferMemory, 0);
vkBindBufferMemory(data->device, *buffer, *bufferMemory, 0);
}
static void copyBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size, struct VulkanData* data) {
VkCommandBufferAllocateInfo allocInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandPool = data->transferCommandPool,
.commandBufferCount = 1,
};
VkCommandBuffer commandBuffer;
vkAllocateCommandBuffers(data->device, &allocInfo, &commandBuffer);
VkCommandBufferBeginInfo beginInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
vkBeginCommandBuffer(commandBuffer, &beginInfo);
VkBufferCopy copyRegion = {
.srcOffset = 0,
.dstOffset = 0,
.size = size,
};
vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, &copyRegion);
vkEndCommandBuffer(commandBuffer);
VkSubmitInfo submitInfo = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &commandBuffer,
};
vkQueueSubmit(data->transferQueue, 1, &submitInfo, VK_NULL_HANDLE);
vkQueueWaitIdle(data->transferQueue);
vkFreeCommandBuffers(data->device, data->transferCommandPool, 1, &commandBuffer);
}
static void createVertexBuffer(struct VulkanData* data) {
VkDeviceSize bufferSize = sizeof(vertices[0]) * VERTEX_COUNT;
VkBuffer stagingBuffer;
VkDeviceMemory stagingBufferMemory;
createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &stagingBuffer, &stagingBufferMemory, data);
void* vertexData;
vkMapMemory(data->device, data->vertexBufferMemory, 0, bufferInfo.size, 0, &vertexData);
memcpy(vertexData, vertices, bufferInfo.size);
vkUnmapMemory(data->device, data->vertexBufferMemory);
vkMapMemory(data->device, stagingBufferMemory, 0, bufferSize, 0, &vertexData);
memcpy(vertexData, vertices, bufferSize);
vkUnmapMemory(data->device, stagingBufferMemory);
createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &data->vertexBuffer, &data->vertexBufferMemory, data);
copyBuffer(stagingBuffer, data->vertexBuffer, bufferSize, data);
vkDestroyBuffer(data->device, stagingBuffer, NULL);
vkFreeMemory(data->device, stagingBufferMemory, NULL);
}
static struct VulkanData initVulkan(GLFWwindow* window) {
@@ -1201,6 +1274,7 @@ static void cleanup(GLFWwindow* window, struct VulkanData* data) {
vkDestroyFence(data->device, data->inFlightFences[i], NULL);
}
vkDestroyCommandPool(data->device, data->commandPool, NULL);
vkDestroyCommandPool(data->device, data->transferCommandPool, NULL);
vkDestroyDevice(data->device, NULL);
if (enableValidationLayers) {
DestroyDebugUtilsMessengerEXT(data->instance, data->debugMessenger, NULL);

View File

@@ -2,7 +2,7 @@
layout(location = 0) in vec3 fragColor;
layout(location = 1) out vec4 outColor;
layout(location = 0) out vec4 outColor;
void main() {
outColor = vec4(fragColor, 1.0);