From 14ab0662df5a6b6cd8e7b4f35364e24154ef47a6 Mon Sep 17 00:00:00 2001 From: Ivan Avdeev Date: Wed, 1 May 2024 16:02:55 -0400 Subject: [PATCH] vk: issue a barrier for staging buffer uploads Trad rendering still works with this slightly more tight sync model. It is suboptimal as it doesn't really know the previous op on the buffer, so it has to do the ALL_COMMANDS stages, which makes validation a bit sore. --- ref/vk/vk_render.c | 20 +++++++++++--------- ref/vk/vk_staging.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/ref/vk/vk_render.c b/ref/vk/vk_render.c index 8823c9ea..dc2552fe 100644 --- a/ref/vk/vk_render.c +++ b/ref/vk/vk_render.c @@ -641,6 +641,7 @@ static uint32_t writeDlightsToUBO( void ) return ubo_lights_offset; } +/* static void debugBarrier( VkCommandBuffer cmdbuf, VkBuffer buf) { const VkBufferMemoryBarrier bmb[] = { { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, @@ -655,26 +656,27 @@ static void debugBarrier( VkCommandBuffer cmdbuf, VkBuffer buf) { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); } +*/ void VK_Render_FIXME_Barrier( VkCommandBuffer cmdbuf ) { const VkBuffer geom_buffer = R_GeometryBuffer_Get(); - debugBarrier(cmdbuf, geom_buffer); - // FIXME + //debugBarrier(cmdbuf, geom_buffer); + // FIXME: this should be automatic and dynamically depend on actual usage, resolving this with render graph { const VkBufferMemoryBarrier bmb[] = { { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - //.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, // FIXME - .dstAccessMask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT , // FIXME + .dstAccessMask + = VK_ACCESS_INDEX_READ_BIT + | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | (vk_core.rtx ? ( VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT) : 0), .buffer = geom_buffer, - .offset = 0, // FIXME - .size = VK_WHOLE_SIZE, // FIXME + .offset = 0, + .size = VK_WHOLE_SIZE, } }; vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, - //VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - //VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | (vk_core.rtx ? VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR : 0), 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); } } diff --git a/ref/vk/vk_staging.c b/ref/vk/vk_staging.c index b87f0f51..4e7ceee0 100644 --- a/ref/vk/vk_staging.c +++ b/ref/vk/vk_staging.c @@ -5,10 +5,12 @@ #include "profiler.h" #include "r_speeds.h" #include "vk_combuf.h" +#include "vk_logs.h" #include #define MODULE_NAME "staging" +#define LOG_MODULE staging #define DEFAULT_STAGING_SIZE (128*1024*1024) #define MAX_STAGING_ALLOCS (2048) @@ -198,6 +200,45 @@ static void commitBuffers(vk_combuf_t *combuf) { // - upload once per buffer // - join adjacent regions + BOUNDED_ARRAY(barriers, VkBufferMemoryBarrier, 4); + + for (int i = 0; i < g_staging.buffers.count; i++) { + const VkBuffer dst_buf = g_staging.buffers.dest[i]; + for (int j = 0;; ++j) { + if (j == COUNTOF(barriers.items)) { + ERR("Ran out of buffer barrier slots, oh no"); + break; + } + + // Instert last + if (j == barriers.count) { + barriers.count++; + barriers.items[j] = (VkBufferMemoryBarrier){ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .buffer = dst_buf, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + break; + } + + // Already inserted + if (barriers.items[j].buffer == dst_buf) + break; + } + } + + if (barriers.count) { + vkCmdPipelineBarrier(cmdbuf, + // FIXME this should be more concrete. Will need to pass buffer "state" around. + // For now it works, but makes validation uhappy. + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, NULL, barriers.count, barriers.items, 0, NULL); + } + VkBuffer prev_buffer = VK_NULL_HANDLE; int first_copy = 0; for (int i = 0; i < g_staging.buffers.count; i++) {