diff --git a/ref_vk/shaders/rtx.comp b/ref_vk/shaders/rtx.comp index 1857b32e..a54c232b 100644 --- a/ref_vk/shaders/rtx.comp +++ b/ref_vk/shaders/rtx.comp @@ -6,6 +6,9 @@ layout(local_size_x = 16, local_size_y = 8, local_size_z = 1) in; layout(binding = 0, set = 0, rgba8) uniform image2D image; layout(binding = 1, set = 0) uniform accelerationStructureEXT tlas; +layout(binding = 2, set = 0) uniform UBO { + mat4 inv_proj, inv_view; +} ubo; //layout(binding = 2, set = 0) buffer Vertices { vec4 vertices[]; }; @@ -16,7 +19,26 @@ layout (push_constant) uniform PC { void main() { vec2 res = imageSize(image); - vec2 uv = gl_GlobalInvocationID.xy / res * .5 - .5; + vec2 uv = (gl_GlobalInvocationID.xy + .5) / res * 2. - 1.; - imageStore(image, ivec2(gl_GlobalInvocationID.xy), vec4(res, 0., 1.)); + vec4 origin = ubo.inv_view * vec4(0, 0, 0, 1); + vec4 target = ubo.inv_proj * vec4(uv.x, uv.y, 1, 1); + vec4 direction = ubo.inv_view * vec4(normalize(target.xyz), 0); + + vec3 C = vec3(0.); + vec3 O = origin.xyz, D=direction.xyz; + + const float L = 10000.; + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, tlas, gl_RayFlagsOpaqueEXT, 0xff, O, 0., D, L); + while(rayQueryProceedEXT(rayQuery)) {} + const float l = rayQueryGetIntersectionTEXT(rayQuery, true); + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionTriangleEXT) { + C = vec3(1., 0., 1.); + } else { + vec3 pos = O+D*l; + C = fract(pos); + } + + imageStore(image, ivec2(gl_GlobalInvocationID.xy), vec4(C, 1.)); } diff --git a/ref_vk/vk_global.h b/ref_vk/vk_global.h index aaaa3714..7b65b0bd 100644 --- a/ref_vk/vk_global.h +++ b/ref_vk/vk_global.h @@ -20,6 +20,8 @@ typedef struct vk_global_camera_s { matrix4x4 projectionMatrix; matrix4x4 worldviewProjectionMatrix; // worldviewMatrix * projectionMatrix + + matrix4x4 projectionMatrixVk; } vk_global_camera_t; extern vk_global_camera_t g_camera; diff --git a/ref_vk/vk_render.c b/ref_vk/vk_render.c index d0cd97e6..6bc2ec0d 100644 --- a/ref_vk/vk_render.c +++ b/ref_vk/vk_render.c @@ -44,7 +44,7 @@ static struct { uint32_t buffer_frame_begin_offset; vk_buffer_t uniform_buffer; - uint32_t uniform_unit_size; + uint32_t ubo_align; struct { int align_holes_size; @@ -243,16 +243,17 @@ qboolean VK_RenderInit( void ) // TODO Better estimates const uint32_t vertex_buffer_size = MAX_BUFFER_VERTICES * sizeof(float) * (3 + 3 + 2 + 2); const uint32_t index_buffer_size = MAX_BUFFER_INDICES * sizeof(uint16_t); - const uint32_t ubo_align = Q_max(4, vk_core.physical_device.properties.limits.minUniformBufferOffsetAlignment); + uint32_t uniform_unit_size; - g_render.uniform_unit_size = ((sizeof(uniform_data_t) + ubo_align - 1) / ubo_align) * ubo_align; + g_render.ubo_align = Q_max(4, vk_core.physical_device.properties.limits.minUniformBufferOffsetAlignment); + uniform_unit_size = ((sizeof(uniform_data_t) + g_render.ubo_align - 1) / g_render.ubo_align) * g_render.ubo_align; // TODO device memory and friends (e.g. handle mobile memory ...) if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (vk_core.rtx ? VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT : 0), VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) return false; - if (!createBuffer(&g_render.uniform_buffer, g_render.uniform_unit_size * MAX_UNIFORM_SLOTS, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + if (!createBuffer(&g_render.uniform_buffer, uniform_unit_size * MAX_UNIFORM_SLOTS, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) return false; { @@ -415,12 +416,19 @@ typedef struct { static struct { int uniform_data_set_mask; - int next_free_uniform_slot; uniform_data_t current_uniform_data; uniform_data_t dirty_uniform_data; + uint32_t current_ubo_offset; + uint32_t uniform_free_offset; + draw_command_t draw_commands[MAX_DRAW_COMMANDS]; int num_draw_commands; + + // FIXME vk_rtx-specific + struct { + matrix4x4 proj_inv, view_inv; + } rtx; } g_render_state; enum { @@ -432,8 +440,9 @@ enum { }; void VK_RenderBegin( void ) { - g_render_state.next_free_uniform_slot = 0; + g_render_state.uniform_free_offset = 0; g_render_state.uniform_data_set_mask = UNIFORM_UNSET; + g_render_state.current_ubo_offset = UINT32_MAX; memset(&g_render_state.current_uniform_data, 0, sizeof(g_render_state.current_uniform_data)); memset(&g_render_state.dirty_uniform_data, 0, sizeof(g_render_state.dirty_uniform_data)); @@ -456,23 +465,33 @@ void VK_RenderStateSetMatrix( const matrix4x4 mvp ) Matrix4x4_ToArrayFloatGL( mvp, (float*)g_render_state.dirty_uniform_data.mvp ); } -static uniform_data_t *getUniformSlot(int index) +void VK_RenderStateSetProjectionMatrix(const matrix4x4 proj) { - ASSERT(index >= 0); - ASSERT(index < MAX_UNIFORM_SLOTS); - return (uniform_data_t*)(((uint8_t*)g_render.uniform_buffer.mapped) + (g_render.uniform_unit_size * index)); + matrix4x4 tmp; + Matrix4x4_Invert_Full(tmp, proj); + Matrix4x4_ToArrayFloatGL( tmp, g_render_state.rtx.proj_inv); } -static int allocUniformSlot( void ) { - if (g_render_state.next_free_uniform_slot == MAX_UNIFORM_SLOTS) - return -1; +void VK_RenderStateSetViewMatrix(const matrix4x4 view) +{ + matrix4x4 tmp; + Matrix4x4_Invert_Full(tmp, view); + Matrix4x4_ToArrayFloatGL( tmp, g_render_state.rtx.view_inv); +} - return g_render_state.next_free_uniform_slot++; +static uint32_t allocUniform( uint32_t size, uint32_t alignment ) { + // FIXME Q_max is not correct, we need NAIMENSCHEEE OBSCHEEE KRATNOE + const uint32_t align = Q_max(alignment, g_render.ubo_align); + const uint32_t offset = (((g_render_state.uniform_free_offset + align - 1) / align) * align); + if (offset + size > g_render.uniform_buffer.size) + return UINT32_MAX; + + g_render_state.uniform_free_offset = offset + size; + return offset; } void VK_RenderScheduleDraw( const render_draw_t *draw ) { - int ubo_index = g_render_state.next_free_uniform_slot - 1; const vk_buffer_alloc_t *vertex_buffer = NULL, *index_buffer = NULL; draw_command_t *draw_command; @@ -507,15 +526,15 @@ void VK_RenderScheduleDraw( const render_draw_t *draw ) // Figure out whether we need to update UBO data, and upload new data if we do // TODO generally it's not safe to do memcmp for structures comparison - if (((g_render_state.uniform_data_set_mask & UNIFORM_UPLOADED) == 0) || memcmp(&g_render_state.current_uniform_data, &g_render_state.dirty_uniform_data, sizeof(g_render_state.current_uniform_data)) != 0) { + if (g_render_state.current_ubo_offset == UINT32_MAX || ((g_render_state.uniform_data_set_mask & UNIFORM_UPLOADED) == 0) || memcmp(&g_render_state.current_uniform_data, &g_render_state.dirty_uniform_data, sizeof(g_render_state.current_uniform_data)) != 0) { uniform_data_t *ubo; - ubo_index = allocUniformSlot(); - if (ubo_index < 0) { + g_render_state.current_ubo_offset = allocUniform( sizeof(uniform_data_t), 16 ); + if (g_render_state.current_ubo_offset == UINT32_MAX) { gEngine.Con_Printf( S_ERROR "Ran out of uniform slots\n" ); return; } - ubo = getUniformSlot( ubo_index ); + ubo = (uniform_data_t*)((byte*)g_render.uniform_buffer.mapped + g_render_state.current_ubo_offset); memcpy(&g_render_state.current_uniform_data, &g_render_state.dirty_uniform_data, sizeof(g_render_state.dirty_uniform_data)); memcpy(ubo, &g_render_state.current_uniform_data, sizeof(*ubo)); g_render_state.uniform_data_set_mask |= UNIFORM_UPLOADED; @@ -523,7 +542,7 @@ void VK_RenderScheduleDraw( const render_draw_t *draw ) draw_command = g_render_state.draw_commands + (g_render_state.num_draw_commands++); draw_command->draw = *draw; - draw_command->ubo_offset = g_render.uniform_unit_size * ubo_index; + draw_command->ubo_offset = g_render_state.current_ubo_offset; } void VK_RenderEnd( VkCommandBuffer cmdbuf ) @@ -625,5 +644,33 @@ void VK_RenderEndRTX( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage VK_RayScenePushModel(cmdbuf, &ray_model_args); } - VK_RaySceneEnd( cmdbuf, img_dst_view, img_dst, w, h ); + + { + float *matrices = NULL; + const vk_ray_scene_render_args_t args = { + .cmdbuf = cmdbuf, + .dst = { + .image_view = img_dst_view, + .image = img_dst, + .width = w, + .height = h, + }, + // FIXME this should really be in vk_rtx, calling vk_render(or what?) to alloc slot for it + .ubo = { + .buffer = g_render.uniform_buffer.buffer, + .offset = allocUniform(sizeof(float) * 16 * 2, 16 * sizeof(float)), + .size = sizeof(float) * 16 * 2, + }, + }; + + if (args.ubo.offset == UINT32_MAX) { + gEngine.Con_Printf(S_ERROR "Cannot allocate UBO for RTX\n"); + return; + } + + matrices = (byte*)g_render.uniform_buffer.mapped + args.ubo.offset; + memcpy(matrices, &g_render_state.rtx, sizeof(g_render_state.rtx)); + + VK_RaySceneEnd(&args); + } } diff --git a/ref_vk/vk_render.h b/ref_vk/vk_render.h index 2bcdbc34..2b33a427 100644 --- a/ref_vk/vk_render.h +++ b/ref_vk/vk_render.h @@ -46,6 +46,9 @@ void VK_RenderStateSetColor( float r, float g, float b, float a ); void VK_RenderStateSetMatrix( const matrix4x4 mvp ); // TODO: set projection and mv matrices separately +void VK_RenderStateSetProjectionMatrix(const matrix4x4 proj); +void VK_RenderStateSetViewMatrix(const matrix4x4 view); + // TODO is this a good place? typedef struct vk_vertex_s { vec3_t pos; diff --git a/ref_vk/vk_rtx.c b/ref_vk/vk_rtx.c index 3f0e9ec1..c617a34b 100644 --- a/ref_vk/vk_rtx.c +++ b/ref_vk/vk_rtx.c @@ -201,14 +201,17 @@ void VK_RayScenePushModel( VkCommandBuffer cmdbuf, const vk_ray_model_create_t * } } -void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage img_dst, uint32_t w, uint32_t h ) +void VK_RaySceneEnd(const vk_ray_scene_render_args_t* args) { ASSERT(vk_core.rtx); + ASSERT(args->ubo.size == sizeof(float) * 16 * 2); // ubo should contain two matrices + const VkCommandBuffer cmdbuf = args->cmdbuf; // Upload all blas instances references to GPU mem { VkAccelerationStructureInstanceKHR *inst = g_rtx.tlas_geom_buffer.mapped; for (int i = 0; i < g_rtx_scene.num_accels; ++i) { + ASSERT(g_rtx.accels[i] != VK_NULL_HANDLE); inst[i] = (VkAccelerationStructureInstanceKHR){ .transform = (VkTransformMatrixKHR){1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}, .instanceCustomIndex = 0, @@ -232,9 +235,9 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i .size = VK_WHOLE_SIZE, }}; vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); } // 2. Create TLAS @@ -255,7 +258,7 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = {g_rtx_scene.num_accels}; const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = { - .primitiveCount = g_rtx_scene.num_accels, + .primitiveCount = g_rtx_scene.num_accels, }; const VkAccelerationStructureBuildRangeInfoKHR *tl_build_ranges[] = {&tl_build_range}; g_rtx.tlas = createAndBuildAccelerationStructure(cmdbuf, @@ -266,16 +269,14 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i { const VkDescriptorImageInfo dii = { .sampler = VK_NULL_HANDLE, - .imageView = img_dst_view, + .imageView = args->dst.image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, }; - /* const VkDescriptorBufferInfo dbi = { - .buffer = g_rtx.tri_buf.buffer, - .offset = 0, - .range = VK_WHOLE_SIZE, + .buffer = args->ubo.buffer, + .offset = args->ubo.offset, + .range = args->ubo.size, }; - */ const VkWriteDescriptorSetAccelerationStructureKHR wdsas = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, .accelerationStructureCount = 1, @@ -291,16 +292,15 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i .dstArrayElement = 0, .pImageInfo = &dii, }, - /* { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .dstSet = g_rtx.desc_set, .dstBinding = 2, .dstArrayElement = 0, .pBufferInfo = &dbi, - },*/ + }, { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .descriptorCount = 1, @@ -328,7 +328,7 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i }}; VkImageMemoryBarrier image_barrier[] = { { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = img_dst, + .image = args->dst.image, .srcAccessMask = 0, .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, @@ -348,7 +348,7 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, g_rtx.pipeline); //vkCmdPushConstants(cmdbuf, g_rtx.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(matrix4x4), mvp); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, g_rtx.pipeline_layout, 0, 1, &g_rtx.desc_set, 0, NULL); - vkCmdDispatch(cmdbuf, (w+WG_W-1)/WG_W, (h+WG_H-1)/WG_H, 1); + vkCmdDispatch(cmdbuf, (args->dst.width+WG_W-1)/WG_W, (args->dst.height+WG_H-1)/WG_H, 1); } static void createLayouts( void ) { @@ -362,10 +362,11 @@ static void createLayouts( void ) { .descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - /* }, { */ /* .binding = 2, */ - /* .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, */ - /* .descriptorCount = 1, */ - /* .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, */ + }, { + .binding = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }, }; @@ -391,7 +392,7 @@ static void createLayouts( void ) { { VkDescriptorPoolSize pools[] = { {.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .descriptorCount = 1}, - //{.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1}, + {.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .descriptorCount = 1}, {.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, .descriptorCount = 1}, }; diff --git a/ref_vk/vk_rtx.h b/ref_vk/vk_rtx.h index b928df68..081bfea7 100644 --- a/ref_vk/vk_rtx.h +++ b/ref_vk/vk_rtx.h @@ -18,7 +18,24 @@ vk_ray_model_handle_t VK_RayModelCreate( const vk_ray_model_create_t *args ); void VK_RaySceneBegin( void ); void VK_RayScenePushModel(VkCommandBuffer cmdbuf, const vk_ray_model_create_t* model); // vk_ray_model_handle_t model ); -void VK_RaySceneEnd(VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage img_dst, uint32_t w, uint32_t h); + +typedef struct { + VkCommandBuffer cmdbuf; + + struct { + VkImageView image_view; + VkImage image; + uint32_t width, height; + } dst; + + // inv_view/proj matrices + struct { + VkBuffer buffer; + uint32_t offset; + uint32_t size; + } ubo; +} vk_ray_scene_render_args_t; +void VK_RaySceneEnd(const vk_ray_scene_render_args_t* args); qboolean VK_RayInit( void ); void VK_RayShutdown( void ); diff --git a/ref_vk/vk_scene.c b/ref_vk/vk_scene.c index 8516fa44..930d5351 100644 --- a/ref_vk/vk_scene.c +++ b/ref_vk/vk_scene.c @@ -548,6 +548,7 @@ static void setupCamera( const ref_viewpass_t *rvp, matrix4x4 mvp ) {0, 0, .5, 1} }; Matrix4x4_Concat( mvp, vk_proj_fixup, g_camera.worldviewProjectionMatrix); + Matrix4x4_Concat( g_camera.projectionMatrixVk, vk_proj_fixup, g_camera.projectionMatrix); } } @@ -632,6 +633,9 @@ void VK_SceneRender( const ref_viewpass_t *rvp ) setupCamera( rvp, mvp ); + VK_RenderStateSetProjectionMatrix(g_camera.projectionMatrixVk); + VK_RenderStateSetViewMatrix(g_camera.modelviewMatrix); + VK_RenderDebugLabelBegin( "opaque" ); // Draw view model