diff --git a/ref_vk/vk_buffer.c b/ref_vk/vk_buffer.c index 4c0158ae..62ad0962 100644 --- a/ref_vk/vk_buffer.c +++ b/ref_vk/vk_buffer.c @@ -16,11 +16,12 @@ qboolean createBuffer(vk_buffer_t *buf, uint32_t size, VkBufferUsageFlags usage, vkGetBufferMemoryRequirements(vk_core.device, buf->buffer, &memreq); gEngine.Con_Reportf("memreq: memoryTypeBits=0x%x alignment=%zu size=%zu\n", memreq.memoryTypeBits, memreq.alignment, memreq.size); - buf->device_memory = allocateDeviceMemory(memreq, flags); + buf->device_memory = allocateDeviceMemory(memreq, flags, usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT : 0); XVK_CHECK(vkBindBufferMemory(vk_core.device, buf->buffer, buf->device_memory.device_memory, buf->device_memory.offset)); // FIXME when there are many allocation per VkDeviceMemory, fix this - XVK_CHECK(vkMapMemory(vk_core.device, buf->device_memory.device_memory, 0, bci.size, 0, &buf->mapped)); + if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & flags) + XVK_CHECK(vkMapMemory(vk_core.device, buf->device_memory.device_memory, 0, bci.size, 0, &buf->mapped)); buf->size = size; @@ -37,7 +38,9 @@ void destroyBuffer(vk_buffer_t *buf) { if (buf->device_memory.device_memory) { - vkUnmapMemory(vk_core.device, buf->device_memory.device_memory); + if (buf->mapped) + vkUnmapMemory(vk_core.device, buf->device_memory.device_memory); + freeDeviceMemory(&buf->device_memory); buf->device_memory.device_memory = VK_NULL_HANDLE; buf->device_memory.offset = 0; diff --git a/ref_vk/vk_core.c b/ref_vk/vk_core.c index ed648138..ee79fd6b 100644 --- a/ref_vk/vk_core.c +++ b/ref_vk/vk_core.c @@ -41,6 +41,7 @@ static PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; INSTANCE_FUNCS(X) INSTANCE_DEBUG_FUNCS(X) DEVICE_FUNCS(X) + DEVICE_FUNCS_RTX(X) #undef X static dllfunc_t nullinst_funcs[] = { @@ -802,11 +803,18 @@ static uint32_t findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFla return UINT32_MAX; } -device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props) { +device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags) { // TODO coalesce allocations, ... device_memory_t ret = {0}; + + const VkMemoryAllocateFlagsInfo mafi = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = flags, + }; + VkMemoryAllocateInfo mai = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = flags ? &mafi : NULL, .allocationSize = req.size, .memoryTypeIndex = findMemoryWithType(req.memoryTypeBits, props), }; diff --git a/ref_vk/vk_core.h b/ref_vk/vk_core.h index a03d9be0..3b690d35 100644 --- a/ref_vk/vk_core.h +++ b/ref_vk/vk_core.h @@ -24,7 +24,7 @@ typedef struct device_memory_s uint32_t offset; } device_memory_t; -device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props); +device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags); void freeDeviceMemory(device_memory_t *mem); typedef struct vk_buffer_s diff --git a/ref_vk/vk_framectl.c b/ref_vk/vk_framectl.c index 3f59a178..0d4441fe 100644 --- a/ref_vk/vk_framectl.c +++ b/ref_vk/vk_framectl.c @@ -77,7 +77,7 @@ static void createDepthImage(int w, int h) { vk_frame.depth.image = createImage(w, h, vk_frame.depth.format, tiling, usage); vkGetImageMemoryRequirements(vk_core.device, vk_frame.depth.image, &memreq); - vk_frame.depth.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + vk_frame.depth.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0); XVK_CHECK(vkBindImageMemory(vk_core.device, vk_frame.depth.image, vk_frame.depth.device_memory.device_memory, 0)); { diff --git a/ref_vk/vk_render.c b/ref_vk/vk_render.c index 4abd1c16..07995040 100644 --- a/ref_vk/vk_render.c +++ b/ref_vk/vk_render.c @@ -249,7 +249,7 @@ qboolean VK_RenderInit( void ) // TODO device memory and friends (e.g. handle mobile memory ...) - if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (vk_core.rtx ? VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT : 0), VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) return false; if (!createBuffer(&g_render.uniform_buffer, g_render.uniform_unit_size * MAX_UNIFORM_SLOTS, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) @@ -411,8 +411,6 @@ typedef struct { render_draw_t draw; uint32_t ubo_offset; //char debug_name[MAX_DEBUG_NAME_LENGTH]; - - vk_ray_model_handle_t ray_model; } draw_command_t; static struct { @@ -526,24 +524,6 @@ void VK_RenderScheduleDraw( const render_draw_t *draw ) draw_command = g_render_state.draw_commands + (g_render_state.num_draw_commands++); draw_command->draw = *draw; draw_command->ubo_offset = g_render.uniform_unit_size * ubo_index; - draw_command->ray_model = InvalidRayModel; - - if (vk_core.rtx) - // TODO there's a more complex story with lifetimes and rebuilds && vertex_buffer->lifetime < LifetimeSingleFrame) - { - // TODO it would make sense to join logical models into a single ray model - // but here we've completely lost this info, as models are now just a stream - // of independent draws - const vk_ray_model_create_t ray_model_args = { - .element_count = draw->element_count, - .vertex_count = vertex_buffer->count, - .index_offset = index_buffer ? index_buffer->unit_size * (draw->index_offset + index_buffer->buffer_offset_in_units) : UINT32_MAX, - .vertex_offset = (draw->vertex_offset + vertex_buffer->buffer_offset_in_units) * vertex_buffer->unit_size, - .buffer = g_render.buffer.buffer, - }; - - draw_command->ray_model = VK_RayModelCreate( &ray_model_args ); - } } void VK_RenderEnd( VkCommandBuffer cmdbuf ) @@ -626,9 +606,24 @@ void VK_RenderEndRTX( VkCommandBuffer cmdbuf ) VK_RaySceneBegin(); for (int i = 0; i < g_render_state.num_draw_commands; ++i) { const draw_command_t *const draw = g_render_state.draw_commands + i; - if (draw->ray_model) { - VK_RayScenePushModel( cmdbuf, draw->ray_model ); - } + const vk_buffer_alloc_t *vertex_buffer = getBufferFromHandle( draw->draw.vertex_buffer ); + const vk_buffer_alloc_t *index_buffer = draw->draw.index_buffer != InvalidHandle ? getBufferFromHandle( draw->draw.index_buffer ) : NULL; + const uint32_t vertex_offset = vertex_buffer->buffer_offset_in_units + draw->draw.vertex_offset; + + // TODO there's a more complex story with lifetimes and rebuilds && vertex_buffer->lifetime < LifetimeSingleFrame) + // TODO it would make sense to join logical models into a single ray model + // but here we've completely lost this info, as models are now just a stream + // of independent draws + + const vk_ray_model_create_t ray_model_args = { + .element_count = draw->draw.element_count, + .max_vertex = vertex_buffer->count, // TODO this is an upper bound for brushes at least, it can be lowered + .index_offset = index_buffer ? index_buffer->unit_size * (draw->draw.index_offset + index_buffer->buffer_offset_in_units) : UINT32_MAX, + .vertex_offset = (draw->draw.vertex_offset + vertex_buffer->buffer_offset_in_units) * vertex_buffer->unit_size, + .buffer = g_render.buffer.buffer, + }; + + VK_RayScenePushModel(cmdbuf, &ray_model_args); } VK_RaySceneEnd( cmdbuf ); } diff --git a/ref_vk/vk_rtx.c b/ref_vk/vk_rtx.c index b1e93ea1..106e2bf9 100644 --- a/ref_vk/vk_rtx.c +++ b/ref_vk/vk_rtx.c @@ -9,15 +9,17 @@ #define MAX_ACCELS 1024 #define MAX_SCRATCH_BUFFER (16*1024*1024) -#define MAX_ACCELS_BUFFER (16*1024*1024) +#define MAX_ACCELS_BUFFER (64*1024*1024) +/* typedef struct { //int lightmap, texture; //int render_mode; - uint32_t element_count, vertex_count; + uint32_t element_count; uint32_t index_offset, vertex_offset; VkBuffer buffer; } vk_ray_model_t; +*/ static struct { /* VkPipelineLayout pipeline_layout; */ @@ -68,11 +70,16 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons return false; } - vkGetAccelerationStructureBuildSizesKHR( + vkGetAccelerationStructureBuildSizesKHR( vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info, max_prim_counts, &build_size); - gEngine.Con_Reportf( - "AS build size: %d, scratch size: %d", build_size.accelerationStructureSize, build_size.buildScratchSize); + { + uint32_t max_prims = 0; + for (int i = 0; i < n_geoms; ++i) + max_prims += max_prim_counts[i]; + gEngine.Con_Reportf( + "AS max_prims=%u, n_geoms=%u, build size: %d, scratch size: %d\n", max_prims, n_geoms, build_size.accelerationStructureSize, build_size.buildScratchSize); + } if (MAX_SCRATCH_BUFFER - g_rtx_scene.scratch_offset < build_size.buildScratchSize) { gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n", @@ -93,6 +100,7 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons // TODO alignment? g_rtx_scene.buffer_offset += build_size.accelerationStructureSize; + g_rtx_scene.buffer_offset = (g_rtx_scene.buffer_offset + 255) & ~255; // Buffer must be aligned to 256 according to spec g_rtx_scene.num_accels++; build_info.dstAccelerationStructure = *handle; @@ -117,38 +125,46 @@ void VK_RaySceneBegin( void ) g_rtx_scene.num_accels = 0; } -static vk_ray_model_t *getModelByHandle(vk_ray_model_handle_t handle); +/* +static vk_ray_model_t *getModelByHandle(vk_ray_model_handle_t handle) +{ +} +*/ -void VK_RayScenePushModel( VkCommandBuffer cmdbuf, vk_ray_model_handle_t model_handle ) +void VK_RayScenePushModel( VkCommandBuffer cmdbuf, const vk_ray_model_create_t *model) // _handle_t model_handle ) { ASSERT(vk_core.rtx); - vk_ray_model_t *model = getModelByHandle(model_handle); - const VkDeviceAddress buffer_addr = getBufferDeviceAddress(model->buffer); - const VkAccelerationStructureGeometryKHR geom[] = { - { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, - .flags = VK_GEOMETRY_OPAQUE_BIT_KHR, - .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, - .geometry.triangles = - (VkAccelerationStructureGeometryTrianglesDataKHR){ - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, - .indexType = model->index_offset == UINT32_MAX ? VK_INDEX_TYPE_NONE_KHR : VK_INDEX_TYPE_UINT16, - .maxVertex = model->vertex_count, - .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, - .vertexStride = sizeof(vk_vertex_t), - .vertexData.deviceAddress = buffer_addr + model->vertex_offset, - .indexData.deviceAddress = buffer_addr + model->index_offset, - }, - }}; - const uint32_t max_prim_counts[ARRAYSIZE(geom)] = {model->vertex_count}; - const VkAccelerationStructureBuildRangeInfoKHR build_range_tri = { - .primitiveCount = model->element_count / 3, - }; - const VkAccelerationStructureBuildRangeInfoKHR *build_ranges[ARRAYSIZE(geom)] = {&build_range_tri}; + { + //vk_ray_model_t *model = getModelByHandle(model_handle); + const VkDeviceAddress buffer_addr = getBufferDeviceAddress(model->buffer); + const uint32_t prim_count = model->element_count / 3; + const VkAccelerationStructureGeometryKHR geom[] = { + { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .flags = VK_GEOMETRY_OPAQUE_BIT_KHR, + .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, + .geometry.triangles = + (VkAccelerationStructureGeometryTrianglesDataKHR){ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, + .indexType = model->index_offset == UINT32_MAX ? VK_INDEX_TYPE_NONE_KHR : VK_INDEX_TYPE_UINT16, + .maxVertex = model->max_vertex, + .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, + .vertexStride = sizeof(vk_vertex_t), + .vertexData.deviceAddress = buffer_addr + model->vertex_offset, + .indexData.deviceAddress = buffer_addr + model->index_offset, + }, + } }; - createAndBuildAccelerationStructure(cmdbuf, - geom, max_prim_counts, build_ranges, ARRAYSIZE(geom), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); + const uint32_t max_prim_counts[ARRAYSIZE(geom)] = { prim_count }; + const VkAccelerationStructureBuildRangeInfoKHR build_range_tri = { + .primitiveCount = prim_count, + }; + const VkAccelerationStructureBuildRangeInfoKHR* build_ranges[ARRAYSIZE(geom)] = { &build_range_tri }; + + createAndBuildAccelerationStructure(cmdbuf, + geom, max_prim_counts, build_ranges, ARRAYSIZE(geom), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); + } } void VK_RaySceneEnd( VkCommandBuffer cmdbuf ) @@ -190,5 +206,7 @@ void VK_RayShutdown( void ) ASSERT(vk_core.rtx); destroyBuffer(&g_rtx.scratch_buffer); destroyBuffer(&g_rtx.accels_buffer); + + // TODO dealloc all ASes } diff --git a/ref_vk/vk_rtx.h b/ref_vk/vk_rtx.h index ddd00988..9d2f1527 100644 --- a/ref_vk/vk_rtx.h +++ b/ref_vk/vk_rtx.h @@ -5,7 +5,8 @@ typedef struct { //int lightmap, texture; //int render_mode; - uint32_t element_count, vertex_count; + uint32_t max_vertex; + uint32_t element_count; uint32_t index_offset, vertex_offset; VkBuffer buffer; } vk_ray_model_create_t; @@ -16,7 +17,7 @@ enum { InvalidRayModel = -1 }; vk_ray_model_handle_t VK_RayModelCreate( const vk_ray_model_create_t *args ); void VK_RaySceneBegin( void ); -void VK_RayScenePushModel( VkCommandBuffer cmdbuf, vk_ray_model_handle_t model ); +void VK_RayScenePushModel(VkCommandBuffer cmdbuf, const vk_ray_model_create_t* model); // vk_ray_model_handle_t model ); void VK_RaySceneEnd( VkCommandBuffer cmdbuf ); qboolean VK_RayInit( void ); diff --git a/ref_vk/vk_textures.c b/ref_vk/vk_textures.c index 710bc318..02faa85b 100644 --- a/ref_vk/vk_textures.c +++ b/ref_vk/vk_textures.c @@ -345,7 +345,7 @@ static qboolean VK_UploadTexture(vk_texture_t *tex, rgbdata_t *pic) { VkMemoryRequirements memreq; vkGetImageMemoryRequirements(vk_core.device, tex->vk.image, &memreq); - tex->vk.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + tex->vk.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0); XVK_CHECK(vkBindImageMemory(vk_core.device, tex->vk.image, tex->vk.device_memory.device_memory, tex->vk.device_memory.offset)); }