diff --git a/engine/platform/sdl/vid_sdl.c b/engine/platform/sdl/vid_sdl.c index 3fa71436..a27eceae 100644 --- a/engine/platform/sdl/vid_sdl.c +++ b/engine/platform/sdl/vid_sdl.c @@ -487,6 +487,9 @@ GL_UpdateSwapInterval void GL_UpdateSwapInterval( void ) { #if SDL_VERSION_ATLEAST( 2, 0, 0 ) + if (glw_state.context_type != REF_GL) + return; + // disable VSync while level is loading if( cls.state < ca_active ) { diff --git a/ref/vk/NOTES.md b/ref/vk/NOTES.md index 6c77c109..39ca3684 100644 --- a/ref/vk/NOTES.md +++ b/ref/vk/NOTES.md @@ -1,3 +1,9 @@ +# cvars +## `rt_force_disable` +On GPUs that support ray tracing forcefully disables it as if it wasn't supported at all. I.e. no RT extensions and modules are initialized. Useful for testing sometimes. +Note: this cvar is read early in `R_VkInit()`, which gets executed before `autoexec.cfg`, `config.cfg`, etc are read. So putting it there will not work. +`video.cfg` and `vk.cfg` are read before Vk initialization, so this cvar should go there. + # Frame structure wrt calls from the engine - (eng) SCR_UpdateScreen() - (eng) V_PreRender() diff --git a/ref/vk/TODO.md b/ref/vk/TODO.md index 77e5038f..9876cb01 100644 --- a/ref/vk/TODO.md +++ b/ref/vk/TODO.md @@ -1,6 +1,33 @@ ## Next +- [ ] Render graph - [ ] performance profiling and comparison +## 2024-04-12 E374 +- [x] ~~`-vknort` arg to force-disable RT at init time~~ -- reverted on 2024-04-29 + +## 2024-03-21 E372: agonizig over agenda +### Player-visible essentials and blockers. Big projects. +- [ ] Light clusters, sampling, and performance -- 90fps HDR on a Steam Deck +- [ ] Transparency, refractions: glass, water, etc +- [ ] Moar and moar correct bounces +- [ ] Denoiser +- [ ] Decals +- [ ] Volumetrics and fog +- [ ] HDR and tonemapping + +### Invisible blockers -- foundation/systems stuff +- [ ] Render graph and resource tracking -- track textures, buffers+regions ownership and usage, automatic barriers, etc. +- [ ] Modules and dependencies tracking +- [ ] Integrate rendertests into CI + +### Small things +- [ ] Material patching refactoring: do not load any patched textures before they are referenced by the engine itself. + Only load patched textures for the textures that are in fact used by something. + +### Nice-to-have +- [ ] Split Vulkan+RT from xash specifics, start preparing it for being a standalone thing. + - [ ] clang-format for it + # Previously ## 2024-02-05 E373 - [x] Skybox for traditional renderer diff --git a/ref/vk/ray_resources.c b/ref/vk/ray_resources.c index 4bd15b70..44cd2fa8 100644 --- a/ref/vk/ray_resources.c +++ b/ref/vk/ray_resources.c @@ -1,8 +1,7 @@ #include "ray_resources.h" #include "vk_core.h" #include "vk_image.h" - -#include "shaders/ray_interop.h" // FIXME temp for type validation +#include "vk_common.h" #include @@ -23,23 +22,28 @@ void R_VkResourcesPrepareDescriptorsValues(VkCommandBuffer cmdbuf, vk_resources_ const qboolean write = i >= args.write_begin; if (res->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + ASSERT(image_barriers_count < COUNTOF(image_barriers)); + if (write) { // No reads are happening - ASSERT(res->read.pipelines == 0); + //ASSERT(res->read.pipelines == 0); - res->write = (ray_resource_state_t) { + src_stage_mask |= res->read.pipelines | res->write.pipelines; + + const ray_resource_state_t new_state = { + .pipelines = args.pipeline, .access_mask = VK_ACCESS_SHADER_WRITE_BIT, .image_layout = VK_IMAGE_LAYOUT_GENERAL, - .pipelines = args.pipeline, }; image_barriers[image_barriers_count++] = (VkImageMemoryBarrier) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .image = src_value->image_object->image, - .srcAccessMask = 0, - .dstAccessMask = res->write.access_mask, + // FIXME MEMORY_WRITE is needed to silence write-after-write layout-transition validation hazard + .srcAccessMask = res->read.access_mask | res->write.access_mask | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = new_state.access_mask, .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .newLayout = res->write.image_layout, + .newLayout = new_state.image_layout, .subresourceRange = (VkImageSubresourceRange) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, @@ -49,18 +53,23 @@ void R_VkResourcesPrepareDescriptorsValues(VkCommandBuffer cmdbuf, vk_resources_ }, }; + // Mark that read would need a transition + res->read = (ray_resource_state_t){0}; + res->write = new_state; } else { // Write happened ASSERT(res->write.pipelines != 0); // No barrier was issued if (!(res->read.pipelines & args.pipeline)) { - res->read.access_mask = VK_ACCESS_SHADER_READ_BIT; - res->read.pipelines |= args.pipeline; - res->read.image_layout = VK_IMAGE_LAYOUT_GENERAL; - src_stage_mask |= res->write.pipelines; + res->read = (ray_resource_state_t) { + .pipelines = res->read.pipelines | args.pipeline, + .access_mask = VK_ACCESS_SHADER_READ_BIT, + .image_layout = VK_IMAGE_LAYOUT_GENERAL, + }; + image_barriers[image_barriers_count++] = (VkImageMemoryBarrier) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .image = src_value->image_object->image, diff --git a/ref/vk/vk_common.h b/ref/vk/vk_common.h index bddc10b9..7cfa5e2b 100644 --- a/ref/vk/vk_common.h +++ b/ref/vk/vk_common.h @@ -36,7 +36,7 @@ extern ref_globals_t *gpGlobals; struct { \ TYPE items[MAX_SIZE]; \ int count; \ - } NAME + } NAME = {0} #define BOUNDED_ARRAY_APPEND(var, item) \ do { \ diff --git a/ref/vk/vk_core.c b/ref/vk/vk_core.c index 9495501e..e1c7fcb1 100644 --- a/ref/vk/vk_core.c +++ b/ref/vk/vk_core.c @@ -41,6 +41,8 @@ #include #include +#define LOG_MODULE core + #define NULLINST_FUNCS(X) \ X(vkEnumerateInstanceVersion) \ X(vkCreateInstance) \ @@ -189,7 +191,7 @@ static qboolean createInstance( void ) .pEngineName = "xash3d-fwgs", }; - BOUNDED_ARRAY(validation_features, VkValidationFeatureEnableEXT, 8) = {0}; + BOUNDED_ARRAY(validation_features, VkValidationFeatureEnableEXT, 8); BOUNDED_ARRAY_APPEND(validation_features, VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT); BOUNDED_ARRAY_APPEND(validation_features, VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT); @@ -518,8 +520,11 @@ static qboolean createDevice( void ) { is_target_device_found = true; } - if (candidate_device->ray_tracing && !CVAR_TO_BOOL(rt_force_disable)) { - vk_core.rtx = true; + if (candidate_device->ray_tracing) { + const qboolean force_disabled = CVAR_TO_BOOL(rt_force_disable); + if (force_disabled) + WARN("GPU[%d] supports ray tracing, but rt_force_disable is set, force-disabling ray tracing support", i); + vk_core.rtx = !force_disabled; } VkPhysicalDeviceAccelerationStructureFeaturesKHR accel_feature = { diff --git a/ref/vk/vk_framectl.c b/ref/vk/vk_framectl.c index 5ad5c1c1..a7e43176 100644 --- a/ref/vk/vk_framectl.c +++ b/ref/vk/vk_framectl.c @@ -3,7 +3,6 @@ #include "vk_overlay.h" #include "vk_scene.h" #include "vk_render.h" -#include "vk_rtx.h" #include "vk_cvar.h" #include "vk_devmem.h" #include "vk_swapchain.h" @@ -142,12 +141,50 @@ static VkRenderPass createRenderPass( VkFormat depth_format, qboolean ray_tracin .pDepthStencilAttachment = &depth_attachment, }; + BOUNDED_ARRAY(dependencies, VkSubpassDependency, 2); + if (vk_core.rtx) { + const VkSubpassDependency color = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + }; + BOUNDED_ARRAY_APPEND(dependencies, color); + } else { + const VkSubpassDependency color = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + }; + BOUNDED_ARRAY_APPEND(dependencies, color); + } + + const VkSubpassDependency depth = { + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + .dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + .srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, + .dependencyFlags = 0, + }; + BOUNDED_ARRAY_APPEND(dependencies, depth); + const VkRenderPassCreateInfo rpci = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = ARRAYSIZE(attachments), .pAttachments = attachments, .subpassCount = 1, .pSubpasses = &subdesc, + .dependencyCount = dependencies.count, + .pDependencies = dependencies.items, }; XVK_CHECK(vkCreateRenderPass(vk_core.device, &rpci, NULL, &render_pass)); @@ -340,8 +377,8 @@ static void submit( vk_combuf_t* combuf, qboolean wait, qboolean draw ) { }; // TODO for RT renderer we only touch framebuffer at the very end of rendering/cmdbuf. // Can we postpone waitinf for framebuffer semaphore until we actually need it. - BOUNDED_ARRAY(waitophores, VkSemaphore, 2) = {0}; - BOUNDED_ARRAY(signalphores, VkSemaphore, 2) = {0}; + BOUNDED_ARRAY(waitophores, VkSemaphore, 2); + BOUNDED_ARRAY(signalphores, VkSemaphore, 2); if (draw) { BOUNDED_ARRAY_APPEND(waitophores, frame->sem_framebuffer_ready); diff --git a/ref/vk/vk_logs.h b/ref/vk/vk_logs.h index cfbe51cc..8f427776 100644 --- a/ref/vk/vk_logs.h +++ b/ref/vk/vk_logs.h @@ -3,6 +3,7 @@ #include "vk_common.h" #define LIST_LOG_MODULES(X) \ + X(core) \ X(misc) \ X(tex) \ X(brush) \ diff --git a/ref/vk/vk_ray_internal.h b/ref/vk/vk_ray_internal.h index 85945406..b549d851 100644 --- a/ref/vk/vk_ray_internal.h +++ b/ref/vk/vk_ray_internal.h @@ -41,7 +41,8 @@ struct vk_combuf_s; qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args); #define MAX_SCRATCH_BUFFER (32*1024*1024) -#define MAX_ACCELS_BUFFER (128*1024*1024) +// FIXME compute this by lazily allocating #define MAX_ACCELS_BUFFER (128*1024*1024) +#define MAX_ACCELS_BUFFER (256*1024*1024) typedef struct { // Geometry metadata. Lifetime is similar to geometry lifetime itself. diff --git a/ref/vk/vk_render.c b/ref/vk/vk_render.c index c72fa3eb..5ad72af9 100644 --- a/ref/vk/vk_render.c +++ b/ref/vk/vk_render.c @@ -641,24 +641,46 @@ static uint32_t writeDlightsToUBO( void ) return ubo_lights_offset; } +/* +static void debugBarrier( VkCommandBuffer cmdbuf, VkBuffer buf) { + const VkBufferMemoryBarrier bmb[] = { { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .buffer = buf, + .offset = 0, + .size = VK_WHOLE_SIZE, + } }; + vkCmdPipelineBarrier(cmdbuf, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); +} +*/ + void VK_Render_FIXME_Barrier( VkCommandBuffer cmdbuf ) { const VkBuffer geom_buffer = R_GeometryBuffer_Get(); - // FIXME + //debugBarrier(cmdbuf, geom_buffer); + // FIXME: this should be automatic and dynamically depend on actual usage, resolving this with render graph { const VkBufferMemoryBarrier bmb[] = { { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - //.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, // FIXME - .dstAccessMask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT , // FIXME + .dstAccessMask + = VK_ACCESS_INDEX_READ_BIT + | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | (vk_core.rtx ? ( VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT) : 0), .buffer = geom_buffer, - .offset = 0, // FIXME - .size = VK_WHOLE_SIZE, // FIXME + .offset = 0, + .size = VK_WHOLE_SIZE, } }; vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, - //VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - //VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | (vk_core.rtx + ? VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR + | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR + | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT + : 0), 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); } } diff --git a/ref/vk/vk_rtx.c b/ref/vk/vk_rtx.c index 9f1420fd..96a47d37 100644 --- a/ref/vk/vk_rtx.c +++ b/ref/vk/vk_rtx.c @@ -378,7 +378,8 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a if (!res->name[0] || !res->image.image || res->source_index_plus_1 > 0) continue; - res->resource.read = res->resource.write = (ray_resource_state_t){0}; + //res->resource.read = res->resource.write = (ray_resource_state_t){0}; + res->resource.write = (ray_resource_state_t){0}; } DEBUG_BEGIN(cmdbuf, "yay tracing"); diff --git a/ref/vk/vk_staging.c b/ref/vk/vk_staging.c index b87f0f51..4e7ceee0 100644 --- a/ref/vk/vk_staging.c +++ b/ref/vk/vk_staging.c @@ -5,10 +5,12 @@ #include "profiler.h" #include "r_speeds.h" #include "vk_combuf.h" +#include "vk_logs.h" #include #define MODULE_NAME "staging" +#define LOG_MODULE staging #define DEFAULT_STAGING_SIZE (128*1024*1024) #define MAX_STAGING_ALLOCS (2048) @@ -198,6 +200,45 @@ static void commitBuffers(vk_combuf_t *combuf) { // - upload once per buffer // - join adjacent regions + BOUNDED_ARRAY(barriers, VkBufferMemoryBarrier, 4); + + for (int i = 0; i < g_staging.buffers.count; i++) { + const VkBuffer dst_buf = g_staging.buffers.dest[i]; + for (int j = 0;; ++j) { + if (j == COUNTOF(barriers.items)) { + ERR("Ran out of buffer barrier slots, oh no"); + break; + } + + // Instert last + if (j == barriers.count) { + barriers.count++; + barriers.items[j] = (VkBufferMemoryBarrier){ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .buffer = dst_buf, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + break; + } + + // Already inserted + if (barriers.items[j].buffer == dst_buf) + break; + } + } + + if (barriers.count) { + vkCmdPipelineBarrier(cmdbuf, + // FIXME this should be more concrete. Will need to pass buffer "state" around. + // For now it works, but makes validation uhappy. + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, NULL, barriers.count, barriers.items, 0, NULL); + } + VkBuffer prev_buffer = VK_NULL_HANDLE; int first_copy = 0; for (int i = 0; i < g_staging.buffers.count; i++) {