mirror of
https://github.com/w23/xash3d-fwgs
synced 2025-01-22 00:30:18 +01:00
cadf3dbdfc
Enable updating rt_blas, and use that for updating brush water models
642 lines
24 KiB
C
642 lines
24 KiB
C
#include "vk_ray_accel.h"
|
|
|
|
#include "vk_core.h"
|
|
#include "vk_rtx.h"
|
|
#include "vk_ray_internal.h"
|
|
#include "r_speeds.h"
|
|
#include "vk_combuf.h"
|
|
#include "vk_staging.h"
|
|
#include "vk_math.h"
|
|
#include "vk_geometry.h"
|
|
#include "vk_render.h"
|
|
|
|
#include "xash3d_mathlib.h"
|
|
|
|
#ifndef ARRAYSIZE
|
|
#define ARRAYSIZE(p) (sizeof(p)/sizeof(p[0]))
|
|
#endif // #ifndef ARRAYSIZE
|
|
|
|
typedef struct rt_blas_s {
|
|
const char *debug_name;
|
|
rt_blas_usage_e usage;
|
|
|
|
VkAccelerationStructureKHR blas;
|
|
|
|
int max_geoms;
|
|
//uint32_t *max_prim_counts;
|
|
int blas_size;
|
|
} rt_blas_t;
|
|
|
|
static struct {
|
|
// Stores AS built data. Lifetime similar to render buffer:
|
|
// - some portion lives for entire map lifetime
|
|
// - some portion lives only for a single frame (may have several frames in flight)
|
|
// TODO: unify this with render buffer
|
|
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
|
|
vk_buffer_t accels_buffer;
|
|
struct alo_pool_s *accels_buffer_alloc;
|
|
|
|
// Temp: lives only during a single frame (may have many in flight)
|
|
// Used for building ASes;
|
|
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
|
|
vk_buffer_t scratch_buffer;
|
|
VkDeviceAddress accels_buffer_addr, scratch_buffer_addr;
|
|
|
|
// Temp-ish: used for making TLAS, contains addressed to all used BLASes
|
|
// Lifetime and nature of usage similar to scratch_buffer
|
|
// TODO: unify them
|
|
// Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY
|
|
vk_buffer_t tlas_geom_buffer;
|
|
VkDeviceAddress tlas_geom_buffer_addr;
|
|
r_flipping_buffer_t tlas_geom_buffer_alloc;
|
|
|
|
// TODO need several TLASes for N frames in flight
|
|
VkAccelerationStructureKHR tlas;
|
|
|
|
// Per-frame data that is accumulated between RayFrameBegin and End calls
|
|
struct {
|
|
uint32_t scratch_offset; // for building dynamic blases
|
|
} frame;
|
|
|
|
struct {
|
|
int instances_count;
|
|
int accels_built;
|
|
} stats;
|
|
} g_accel;
|
|
|
|
static VkAccelerationStructureBuildSizesInfoKHR getAccelSizes(const VkAccelerationStructureBuildGeometryInfoKHR *build_info, const uint32_t *max_prim_counts) {
|
|
VkAccelerationStructureBuildSizesInfoKHR build_size = {
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR
|
|
};
|
|
|
|
vkGetAccelerationStructureBuildSizesKHR(
|
|
vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, build_info, max_prim_counts, &build_size);
|
|
|
|
return build_size;
|
|
}
|
|
|
|
static VkAccelerationStructureKHR createAccel(const char *name, VkAccelerationStructureTypeKHR type, uint32_t size) {
|
|
const alo_block_t block = aloPoolAllocate(g_accel.accels_buffer_alloc, size, /*TODO why? align=*/256);
|
|
|
|
if (block.offset == ALO_ALLOC_FAILED) {
|
|
gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for blas \"%s\"\n", size, name);
|
|
return VK_NULL_HANDLE;
|
|
}
|
|
|
|
const VkAccelerationStructureCreateInfoKHR asci = {
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
|
|
.buffer = g_accel.accels_buffer.buffer,
|
|
.offset = block.offset,
|
|
.type = type,
|
|
.size = size,
|
|
};
|
|
|
|
VkAccelerationStructureKHR accel = VK_NULL_HANDLE;
|
|
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, &accel));
|
|
SET_DEBUG_NAME(accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, name);
|
|
return accel;
|
|
}
|
|
|
|
static VkDeviceAddress getAccelAddress(VkAccelerationStructureKHR as) {
|
|
VkAccelerationStructureDeviceAddressInfoKHR asdai = {
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
|
|
.accelerationStructure = as,
|
|
};
|
|
return vkGetAccelerationStructureDeviceAddressKHR(vk_core.device, &asdai);
|
|
}
|
|
|
|
static qboolean buildAccel(VkBuffer geometry_buffer, VkAccelerationStructureBuildGeometryInfoKHR *build_info, uint32_t scratch_buffer_size, const VkAccelerationStructureBuildRangeInfoKHR *build_ranges) {
|
|
// FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too
|
|
vk_combuf_t *const combuf = R_VkStagingCommit();
|
|
{
|
|
const VkBufferMemoryBarrier bmb[] = { {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT, // FIXME
|
|
.buffer = geometry_buffer,
|
|
.offset = 0, // FIXME
|
|
.size = VK_WHOLE_SIZE, // FIXME
|
|
} };
|
|
vkCmdPipelineBarrier(combuf->cmdbuf,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
//VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
|
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
|
|
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
|
|
}
|
|
|
|
//gEngine.Con_Reportf("sratch offset = %d, req=%d\n", g_accel.frame.scratch_offset, scratch_buffer_size);
|
|
|
|
if (MAX_SCRATCH_BUFFER < g_accel.frame.scratch_offset + scratch_buffer_size) {
|
|
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
|
|
MAX_SCRATCH_BUFFER - g_accel.frame.scratch_offset,
|
|
scratch_buffer_size);
|
|
return false;
|
|
}
|
|
|
|
build_info->scratchData.deviceAddress = g_accel.scratch_buffer_addr + g_accel.frame.scratch_offset;
|
|
|
|
//uint32_t scratch_offset_initial = g_accel.frame.scratch_offset;
|
|
g_accel.frame.scratch_offset += scratch_buffer_size;
|
|
g_accel.frame.scratch_offset = ALIGN_UP(g_accel.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment);
|
|
|
|
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
|
|
|
|
g_accel.stats.accels_built++;
|
|
|
|
static int scope_id = -2;
|
|
if (scope_id == -2)
|
|
scope_id = R_VkGpuScope_Register("build_as");
|
|
const int begin_index = R_VkCombufScopeBegin(combuf, scope_id);
|
|
const VkAccelerationStructureBuildRangeInfoKHR *p_build_ranges = build_ranges;
|
|
vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, build_info, &p_build_ranges);
|
|
R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
|
|
|
|
return true;
|
|
}
|
|
|
|
// TODO split this into smaller building blocks in a separate module
|
|
qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args) {
|
|
ASSERT(args->geoms);
|
|
ASSERT(args->n_geoms > 0);
|
|
ASSERT(args->p_accel);
|
|
|
|
const qboolean should_create = *args->p_accel == VK_NULL_HANDLE;
|
|
|
|
VkAccelerationStructureBuildGeometryInfoKHR build_info = {
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
|
|
.type = args->type,
|
|
.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
|
|
.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
|
|
.geometryCount = args->n_geoms,
|
|
.pGeometries = args->geoms,
|
|
.srcAccelerationStructure = VK_NULL_HANDLE,
|
|
};
|
|
|
|
const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, args->max_prim_counts);
|
|
|
|
if (should_create) {
|
|
*args->p_accel = createAccel(args->debug_name, args->type, build_size.accelerationStructureSize);
|
|
|
|
if (!args->p_accel)
|
|
return false;
|
|
|
|
if (args->out_accel_addr)
|
|
*args->out_accel_addr = getAccelAddress(*args->p_accel);
|
|
|
|
if (args->inout_size)
|
|
*args->inout_size = build_size.accelerationStructureSize;
|
|
|
|
// gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x\n", *args->p_accel, args->n_geoms, buffer_offset, asci.size, buffer_offset + asci.size);
|
|
}
|
|
|
|
// If not enough data for building, just create
|
|
if (!combuf || !args->build_ranges)
|
|
return true;
|
|
|
|
if (args->inout_size)
|
|
ASSERT(*args->inout_size >= build_size.accelerationStructureSize);
|
|
|
|
build_info.dstAccelerationStructure = *args->p_accel;
|
|
const VkBuffer geometry_buffer = R_GeometryBuffer_Get();
|
|
return buildAccel(geometry_buffer, &build_info, build_size.buildScratchSize, args->build_ranges);
|
|
}
|
|
|
|
static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) {
|
|
const VkAccelerationStructureGeometryKHR tl_geom[] = {
|
|
{
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
|
//.flags = VK_GEOMETRY_OPAQUE_BIT,
|
|
.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR,
|
|
.geometry.instances =
|
|
(VkAccelerationStructureGeometryInstancesDataKHR){
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,
|
|
.data.deviceAddress = instances_addr,
|
|
.arrayOfPointers = VK_FALSE,
|
|
},
|
|
},
|
|
};
|
|
const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_INSTANCES };
|
|
const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = {
|
|
.primitiveCount = g_ray_model_state.frame.instances_count,
|
|
};
|
|
const as_build_args_t asrgs = {
|
|
.geoms = tl_geom,
|
|
.max_prim_counts = tl_max_prim_counts,
|
|
.build_ranges = !combuf ? NULL : &tl_build_range,
|
|
.n_geoms = COUNTOF(tl_geom),
|
|
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
|
|
// we can't really rebuild TLAS because instance count changes are not allowed .dynamic = true,
|
|
.dynamic = false,
|
|
.p_accel = &g_accel.tlas,
|
|
.debug_name = "TLAS",
|
|
.out_accel_addr = NULL,
|
|
.inout_size = NULL,
|
|
};
|
|
if (!createOrUpdateAccelerationStructure(combuf, &asrgs)) {
|
|
gEngine.Host_Error("Could not create/update TLAS\n");
|
|
return;
|
|
}
|
|
}
|
|
|
|
vk_resource_t RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
|
|
ASSERT(g_ray_model_state.frame.instances_count > 0);
|
|
DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas");
|
|
|
|
R_FlippingBuffer_Flip( &g_accel.tlas_geom_buffer_alloc );
|
|
|
|
const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.instances_count, 1);
|
|
ASSERT(instance_offset != ALO_ALLOC_FAILED);
|
|
|
|
// Upload all blas instances references to GPU mem
|
|
{
|
|
const vk_staging_region_t headers_lock = R_VkStagingLockForBuffer((vk_staging_buffer_args_t){
|
|
.buffer = g_ray_model_state.model_headers_buffer.buffer,
|
|
.offset = 0,
|
|
.size = g_ray_model_state.frame.instances_count * sizeof(struct ModelHeader),
|
|
.alignment = 16,
|
|
});
|
|
|
|
ASSERT(headers_lock.ptr);
|
|
|
|
VkAccelerationStructureInstanceKHR* inst = ((VkAccelerationStructureInstanceKHR*)g_accel.tlas_geom_buffer.mapped) + instance_offset;
|
|
for (int i = 0; i < g_ray_model_state.frame.instances_count; ++i) {
|
|
const rt_draw_instance_t* const instance = g_ray_model_state.frame.instances + i;
|
|
ASSERT(instance->blas_addr != 0);
|
|
inst[i] = (VkAccelerationStructureInstanceKHR){
|
|
.instanceCustomIndex = instance->kusochki_offset,
|
|
.instanceShaderBindingTableRecordOffset = 0,
|
|
.accelerationStructureReference = instance->blas_addr,
|
|
};
|
|
switch (instance->material_mode) {
|
|
case MATERIAL_MODE_OPAQUE:
|
|
inst[i].mask = GEOMETRY_BIT_OPAQUE;
|
|
inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_REGULAR,
|
|
inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR;
|
|
break;
|
|
case MATERIAL_MODE_OPAQUE_ALPHA_TEST:
|
|
inst[i].mask = GEOMETRY_BIT_ALPHA_TEST;
|
|
inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_ALPHA_TEST,
|
|
inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR;
|
|
break;
|
|
case MATERIAL_MODE_TRANSLUCENT:
|
|
inst[i].mask = GEOMETRY_BIT_REFRACTIVE;
|
|
inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_REGULAR,
|
|
inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR;
|
|
break;
|
|
case MATERIAL_MODE_BLEND_ADD:
|
|
case MATERIAL_MODE_BLEND_MIX:
|
|
case MATERIAL_MODE_BLEND_GLOW:
|
|
inst[i].mask = GEOMETRY_BIT_BLEND;
|
|
inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_ADDITIVE,
|
|
inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR;
|
|
break;
|
|
default:
|
|
gEngine.Host_Error("Unexpected material mode %d\n", instance->material_mode);
|
|
break;
|
|
}
|
|
memcpy(&inst[i].transform, instance->transform_row, sizeof(VkTransformMatrixKHR));
|
|
|
|
struct ModelHeader *const header = ((struct ModelHeader*)headers_lock.ptr) + i;
|
|
header->mode = instance->material_mode;
|
|
Vector4Copy(instance->color, header->color);
|
|
Matrix4x4_ToArrayFloatGL(instance->prev_transform_row, (float*)header->prev_transform);
|
|
}
|
|
|
|
R_VkStagingUnlock(headers_lock.handle);
|
|
}
|
|
|
|
g_accel.stats.instances_count = g_ray_model_state.frame.instances_count;
|
|
|
|
// Barrier for building all BLASes
|
|
// BLAS building is now in cmdbuf, need to synchronize with results
|
|
{
|
|
VkBufferMemoryBarrier bmb[] = {{
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, // | VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR,
|
|
.buffer = g_accel.accels_buffer.buffer,
|
|
.offset = instance_offset * sizeof(VkAccelerationStructureInstanceKHR),
|
|
.size = g_ray_model_state.frame.instances_count * sizeof(VkAccelerationStructureInstanceKHR),
|
|
}};
|
|
vkCmdPipelineBarrier(combuf->cmdbuf,
|
|
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
|
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
|
0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL);
|
|
}
|
|
|
|
// 2. Build TLAS
|
|
createTlas(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR));
|
|
DEBUG_END(combuf->cmdbuf);
|
|
|
|
// TODO return vk_resource_t with callback to all this "do the preparation and barriers" crap, instead of doing it here
|
|
{
|
|
const VkBufferMemoryBarrier bmb[] = { {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
|
|
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
|
.buffer = g_accel.accels_buffer.buffer,
|
|
.offset = 0,
|
|
.size = VK_WHOLE_SIZE,
|
|
} };
|
|
vkCmdPipelineBarrier(combuf->cmdbuf,
|
|
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
|
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
|
|
}
|
|
|
|
return (vk_resource_t){
|
|
.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
|
|
.value = (vk_descriptor_value_t){
|
|
.accel = (VkWriteDescriptorSetAccelerationStructureKHR) {
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
|
|
.accelerationStructureCount = 1,
|
|
.pAccelerationStructures = &g_accel.tlas,
|
|
.pNext = NULL,
|
|
},
|
|
},
|
|
};
|
|
}
|
|
|
|
qboolean RT_VkAccelInit(void) {
|
|
if (!VK_BufferCreate("ray accels_buffer", &g_accel.accels_buffer, MAX_ACCELS_BUFFER,
|
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
|
|
))
|
|
{
|
|
return false;
|
|
}
|
|
g_accel.accels_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.accels_buffer.buffer);
|
|
|
|
if (!VK_BufferCreate("ray scratch_buffer", &g_accel.scratch_buffer, MAX_SCRATCH_BUFFER,
|
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
|
|
)) {
|
|
return false;
|
|
}
|
|
g_accel.scratch_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.scratch_buffer.buffer);
|
|
|
|
// TODO this doesn't really need to be host visible, use staging
|
|
if (!VK_BufferCreate("ray tlas_geom_buffer", &g_accel.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_INSTANCES * 2,
|
|
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
|
|
// FIXME complain, handle
|
|
return false;
|
|
}
|
|
g_accel.tlas_geom_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.tlas_geom_buffer.buffer);
|
|
R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_INSTANCES * 2);
|
|
|
|
g_accel.accels_buffer_alloc = aloPoolCreate(MAX_ACCELS_BUFFER, MAX_INSTANCES, /* why */ 256);
|
|
|
|
R_SpeedsRegisterMetric(&g_accel.stats.instances_count, "accels_instances_count", kSpeedsMetricCount);
|
|
R_SpeedsRegisterMetric(&g_accel.stats.accels_built, "accels_built", kSpeedsMetricCount);
|
|
|
|
return true;
|
|
}
|
|
|
|
void RT_VkAccelShutdown(void) {
|
|
if (g_accel.tlas != VK_NULL_HANDLE)
|
|
vkDestroyAccelerationStructureKHR(vk_core.device, g_accel.tlas, NULL);
|
|
|
|
VK_BufferDestroy(&g_accel.scratch_buffer);
|
|
VK_BufferDestroy(&g_accel.accels_buffer);
|
|
VK_BufferDestroy(&g_accel.tlas_geom_buffer);
|
|
if (g_accel.accels_buffer_alloc)
|
|
aloPoolDestroy(g_accel.accels_buffer_alloc);
|
|
}
|
|
|
|
void RT_VkAccelNewMap(void) {
|
|
const int expected_accels = 512; // TODO actually get this from playing the game
|
|
const int accels_alignment = 256; // TODO where does this come from?
|
|
ASSERT(vk_core.rtx);
|
|
|
|
g_accel.frame.scratch_offset = 0;
|
|
|
|
if (g_accel.accels_buffer_alloc)
|
|
aloPoolDestroy(g_accel.accels_buffer_alloc);
|
|
g_accel.accels_buffer_alloc = aloPoolCreate(MAX_ACCELS_BUFFER, expected_accels, accels_alignment);
|
|
|
|
// Recreate tlas
|
|
// Why here and not in init: to make sure that its memory is preserved. Map init will clear all memory regions.
|
|
{
|
|
if (g_accel.tlas != VK_NULL_HANDLE) {
|
|
vkDestroyAccelerationStructureKHR(vk_core.device, g_accel.tlas, NULL);
|
|
g_accel.tlas = VK_NULL_HANDLE;
|
|
}
|
|
|
|
createTlas(VK_NULL_HANDLE, g_accel.tlas_geom_buffer_addr);
|
|
}
|
|
}
|
|
|
|
void RT_VkAccelFrameBegin(void) {
|
|
g_accel.frame.scratch_offset = 0;
|
|
}
|
|
|
|
struct rt_blas_s* RT_BlasCreate(const char *name, rt_blas_usage_e usage) {
|
|
rt_blas_t *blas = Mem_Calloc(vk_core.pool, sizeof(*blas));
|
|
|
|
blas->debug_name = name;
|
|
blas->usage = usage;
|
|
blas->blas_size = -1;
|
|
|
|
return blas;
|
|
}
|
|
|
|
qboolean RT_BlasPreallocate(struct rt_blas_s* blas, rt_blas_preallocate_t args) {
|
|
ASSERT(!blas->blas);
|
|
ASSERT(blas->usage == kBlasBuildDynamicFast);
|
|
|
|
// TODO allocate these from static pool
|
|
VkAccelerationStructureGeometryKHR *const as_geoms = Mem_Calloc(vk_core.pool, args.max_geometries * sizeof(*as_geoms));
|
|
uint32_t *const max_prim_counts = Mem_Malloc(vk_core.pool, args.max_geometries * sizeof(*max_prim_counts));
|
|
VkAccelerationStructureBuildRangeInfoKHR *const build_ranges = Mem_Calloc(vk_core.pool, args.max_geometries * sizeof(*build_ranges));
|
|
|
|
for (int i = 0; i < args.max_geometries; ++i) {
|
|
max_prim_counts[i] = args.max_prims_per_geometry;
|
|
as_geoms[i] = (VkAccelerationStructureGeometryKHR)
|
|
{
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
|
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // FIXME this is not true. incoming mode might have transparency eventually (and also dynamically)
|
|
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
|
|
.geometry.triangles =
|
|
(VkAccelerationStructureGeometryTrianglesDataKHR){
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
|
|
.indexType = VK_INDEX_TYPE_UINT16,
|
|
.maxVertex = args.max_vertex_per_geometry,
|
|
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
|
|
.vertexStride = sizeof(vk_vertex_t),
|
|
.vertexData.deviceAddress = 0,
|
|
.indexData.deviceAddress = 0,
|
|
},
|
|
};
|
|
|
|
build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) {
|
|
.primitiveCount = args.max_prims_per_geometry,
|
|
.primitiveOffset = 0,
|
|
.firstVertex = 0,
|
|
};
|
|
}
|
|
|
|
VkAccelerationStructureBuildGeometryInfoKHR build_info = {
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
|
|
.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,
|
|
.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
|
|
.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR,
|
|
.geometryCount = args.max_geometries,
|
|
.srcAccelerationStructure = VK_NULL_HANDLE,
|
|
.pGeometries = as_geoms,
|
|
};
|
|
|
|
const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, max_prim_counts);
|
|
gEngine.Con_Reportf("geoms=%d max_prims=%d max_vertex=%d => blas=%dKiB\n",
|
|
args.max_geometries, args.max_prims_per_geometry, args.max_vertex_per_geometry, (int)build_size.accelerationStructureSize / 1024);
|
|
|
|
qboolean retval = false;
|
|
|
|
blas->blas = createAccel(blas->debug_name, VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, build_size.accelerationStructureSize);
|
|
if (!blas->blas) {
|
|
gEngine.Con_Printf(S_ERROR "Couldn't preallocate blas %s\n", blas->debug_name);
|
|
goto finalize;
|
|
}
|
|
|
|
retval = true;
|
|
|
|
blas->blas_size = build_size.accelerationStructureSize;
|
|
blas->max_geoms = build_info.geometryCount;
|
|
|
|
finalize:
|
|
Mem_Free(as_geoms);
|
|
Mem_Free(max_prim_counts);
|
|
Mem_Free(build_ranges);
|
|
return retval;
|
|
}
|
|
|
|
void RT_BlasDestroy(struct rt_blas_s* blas) {
|
|
if (!blas)
|
|
return;
|
|
|
|
/* if (blas->max_prims) */
|
|
/* Mem_Free(blas->max_prims); */
|
|
|
|
if (blas->blas)
|
|
vkDestroyAccelerationStructureKHR(vk_core.device, blas->blas, NULL);
|
|
|
|
Mem_Free(blas);
|
|
}
|
|
|
|
VkDeviceAddress RT_BlasGetDeviceAddress(struct rt_blas_s *blas) {
|
|
return getAccelAddress(blas->blas);
|
|
}
|
|
|
|
qboolean RT_BlasBuild(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count) {
|
|
if (!blas || !geoms_count)
|
|
return false;
|
|
|
|
VkAccelerationStructureBuildGeometryInfoKHR build_info = {
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
|
|
.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,
|
|
.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
|
|
.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
|
|
.geometryCount = geoms_count,
|
|
.srcAccelerationStructure = VK_NULL_HANDLE,
|
|
};
|
|
|
|
qboolean is_update = false;
|
|
|
|
switch (blas->usage) {
|
|
case kBlasBuildStatic:
|
|
ASSERT(!blas->blas);
|
|
break;
|
|
case kBlasBuildDynamicUpdate:
|
|
if (blas->blas) {
|
|
build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
|
|
build_info.srcAccelerationStructure = blas->blas;
|
|
is_update = true;
|
|
}
|
|
build_info.flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR;
|
|
break;
|
|
case kBlasBuildDynamicFast:
|
|
build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR;
|
|
break;
|
|
}
|
|
|
|
const VkBuffer geometry_buffer = R_GeometryBuffer_Get();
|
|
const VkDeviceAddress buffer_addr = R_VkBufferGetDeviceAddress(geometry_buffer);
|
|
|
|
// TODO allocate these from static pool
|
|
VkAccelerationStructureGeometryKHR *const as_geoms = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*as_geoms));
|
|
uint32_t *const max_prim_counts = Mem_Malloc(vk_core.pool, geoms_count * sizeof(*max_prim_counts));
|
|
VkAccelerationStructureBuildRangeInfoKHR *const build_ranges = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*build_ranges));
|
|
|
|
for (int i = 0; i < geoms_count; ++i) {
|
|
const vk_render_geometry_t *mg = geoms + i;
|
|
const uint32_t prim_count = mg->element_count / 3;
|
|
|
|
max_prim_counts[i] = prim_count;
|
|
as_geoms[i] = (VkAccelerationStructureGeometryKHR)
|
|
{
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
|
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // FIXME this is not true. incoming mode might have transparency eventually (and also dynamically)
|
|
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
|
|
.geometry.triangles =
|
|
(VkAccelerationStructureGeometryTrianglesDataKHR){
|
|
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
|
|
.indexType = VK_INDEX_TYPE_UINT16,
|
|
.maxVertex = mg->max_vertex,
|
|
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
|
|
.vertexStride = sizeof(vk_vertex_t),
|
|
.vertexData.deviceAddress = buffer_addr,
|
|
.indexData.deviceAddress = buffer_addr,
|
|
},
|
|
};
|
|
|
|
build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) {
|
|
.primitiveCount = prim_count,
|
|
.primitiveOffset = mg->index_offset * sizeof(uint16_t),
|
|
.firstVertex = mg->vertex_offset,
|
|
};
|
|
}
|
|
|
|
build_info.pGeometries = as_geoms;
|
|
|
|
const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, max_prim_counts);
|
|
|
|
qboolean retval = false;
|
|
|
|
// allocate blas
|
|
if (!blas->blas) {
|
|
blas->blas = createAccel(blas->debug_name, VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, build_size.accelerationStructureSize);
|
|
if (!blas->blas) {
|
|
gEngine.Con_Printf(S_ERROR "Couldn't create vk accel\n");
|
|
goto finalize;
|
|
}
|
|
|
|
blas->blas_size = build_size.accelerationStructureSize;
|
|
blas->max_geoms = build_info.geometryCount;
|
|
} else {
|
|
if (blas->blas_size < build_size.accelerationStructureSize) {
|
|
gEngine.Con_Printf(S_ERROR "Fast dynamic BLAS %s size exceeded (need %dKiB, have %dKiB, geoms = %d)\n", blas->debug_name,
|
|
(int)build_size.accelerationStructureSize / 1024,
|
|
blas->blas_size / 1024,
|
|
geoms_count
|
|
);
|
|
goto finalize;
|
|
}
|
|
}
|
|
|
|
// Build
|
|
build_info.dstAccelerationStructure = blas->blas;
|
|
if (!buildAccel(geometry_buffer, &build_info, is_update ? build_size.updateScratchSize : build_size.buildScratchSize, build_ranges)) {
|
|
gEngine.Con_Printf(S_ERROR "Couldn't build BLAS %s\n", blas->debug_name);
|
|
goto finalize;
|
|
}
|
|
|
|
retval = true;
|
|
|
|
finalize:
|
|
Mem_Free(as_geoms);
|
|
Mem_Free(max_prim_counts);
|
|
Mem_Free(build_ranges);
|
|
return retval;
|
|
}
|