rt: start refactoring blas/tlas mgmt code

Draft the new accel/blas apis. Consolidate everything accel-related into
vk_ray_accel.c. Start splitting into more atomic functions. Prepare for
blas-model+kusochki split. etc etc.

The new code isn't really used yet.
This commit is contained in:
Ivan Avdeev 2023-05-18 11:59:14 -07:00
parent 4af9f65cd0
commit b65f84793a
7 changed files with 414 additions and 129 deletions

View File

@ -168,31 +168,8 @@ Exposed ops:
- build vs update
- Add to TLAS w/ color/xform/mmode/...
// just creates BLAS, doesn't alloc anything
rt_blas_t RT_BlasAlloc(int max_geometries);
typedef enum {
kBlasBuildStatic, // builds slow for fast trace
kBlasBuildDynamicUpdate, // builds if not built, updates if built
kBlasBuildDynamicFast, // builds fast from scratch (no correlation with previous frame guaranteed, e.g. triapi)
} rt_blas_build_mode_e;
// 1. Schedules BLAS build (allocates geoms+ranges from a temp pool, etc).
// 2. Allocates kusochki (if not) and fills them with geom and initial material data
void RT_BlasBuild(rt_blas_t *blas, const vk_render_geometry_t *geoms[], int geoms_count, rt_blas_build_mode_e mode);
// Update animated kusochki
void RT_BlasUpdateMaterialsSubset(rt_blas_t *blas...);
// Clone kusochki with different base_color texture (sprites)
uint32_t RT_BlasOverrideMaterial(rt_blas_t *blas, int texture);
struct rt_blas_s;
typedef struct {
const struct rt_blas_s* model;
int material_mode;
matrix3x4 transform, prev_transform;
vec4_t color;
uint32_t material_override;
} rt_frame_add_model_args_t;
void RT_FrameAddBlasInstance( rt_frame_add_model_args_t args );
- geometry_buffer -- vtx+idx static + multi-frame dynamic + single-frame dynamic
- kusochki_buffer -- kusok[] static + dynamic + clone_dynamic
- accel_buffer -- static + multiframe dynamic + single-frame dynamic
- scratch_buffer - single-frame dynamic
- model_buffer - single-frame dynamic

View File

@ -7,22 +7,97 @@
#include "vk_combuf.h"
#include "vk_staging.h"
#include "vk_math.h"
#include "vk_geometry.h"
#include "vk_render.h"
#include "xash3d_mathlib.h"
#define MAX_SCRATCH_BUFFER (32*1024*1024)
#define MAX_ACCELS_BUFFER (64*1024*1024)
#ifndef ARRAYSIZE
#define ARRAYSIZE(p) (sizeof(p)/sizeof(p[0]))
#endif // #ifndef ARRAYSIZE
struct rt_vk_ray_accel_s g_accel = {0};
typedef struct rt_blas_s {
rt_blas_usage_e usage;
VkAccelerationStructureKHR blas;
VkDeviceAddress blas_addr;
int max_geoms;
//uint32_t *max_prim_counts;
int blas_size;
} rt_blas_t;
static struct {
// Stores AS built data. Lifetime similar to render buffer:
// - some portion lives for entire map lifetime
// - some portion lives only for a single frame (may have several frames in flight)
// TODO: unify this with render buffer
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
vk_buffer_t accels_buffer;
struct alo_pool_s *accels_buffer_alloc;
// Temp: lives only during a single frame (may have many in flight)
// Used for building ASes;
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
vk_buffer_t scratch_buffer;
VkDeviceAddress accels_buffer_addr, scratch_buffer_addr;
// Temp-ish: used for making TLAS, contains addressed to all used BLASes
// Lifetime and nature of usage similar to scratch_buffer
// TODO: unify them
// Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY
vk_buffer_t tlas_geom_buffer;
VkDeviceAddress tlas_geom_buffer_addr;
r_flipping_buffer_t tlas_geom_buffer_alloc;
// TODO need several TLASes for N frames in flight
VkAccelerationStructureKHR tlas;
// Per-frame data that is accumulated between RayFrameBegin and End calls
struct {
uint32_t scratch_offset; // for building dynamic blases
} frame;
struct {
int instances_count;
int accels_built;
} stats;
} g_accel_;
} g_accel;
static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
static VkAccelerationStructureBuildSizesInfoKHR getAccelSizes(const VkAccelerationStructureBuildGeometryInfoKHR *build_info, const uint32_t *max_prim_counts) {
VkAccelerationStructureBuildSizesInfoKHR build_size = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR
};
vkGetAccelerationStructureBuildSizesKHR(
vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, build_info, max_prim_counts, &build_size);
return build_size;
}
static VkAccelerationStructureKHR createAccel(const char *name, VkAccelerationStructureTypeKHR type, uint32_t size) {
const alo_block_t block = aloPoolAllocate(g_accel.accels_buffer_alloc, size, /*TODO why? align=*/256);
if (block.offset == ALO_ALLOC_FAILED) {
gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for blas \"%s\"\n", size, name);
return VK_NULL_HANDLE;
}
const VkAccelerationStructureCreateInfoKHR asci = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
.buffer = g_accel.accels_buffer.buffer,
.offset = block.offset,
.type = type,
.size = size,
};
VkAccelerationStructureKHR accel = VK_NULL_HANDLE;
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, &accel));
SET_DEBUG_NAME(accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, name);
return accel;
}
static VkDeviceAddress getAccelAddress(VkAccelerationStructureKHR as) {
VkAccelerationStructureDeviceAddressInfoKHR asdai = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
.accelerationStructure = as,
@ -30,6 +105,57 @@ static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
return vkGetAccelerationStructureDeviceAddressKHR(vk_core.device, &asdai);
}
static qboolean buildAccel(VkBuffer geometry_buffer, VkAccelerationStructureBuildGeometryInfoKHR *build_info, const VkAccelerationStructureBuildSizesInfoKHR *build_size, const VkAccelerationStructureBuildRangeInfoKHR *build_ranges) {
// FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too
vk_combuf_t *const combuf = R_VkStagingCommit();
{
const VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
//.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, // FIXME
.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT, // FIXME
.buffer = geometry_buffer,
.offset = 0, // FIXME
.size = VK_WHOLE_SIZE, // FIXME
} };
vkCmdPipelineBarrier(combuf->cmdbuf,
VK_PIPELINE_STAGE_TRANSFER_BIT,
//VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
// build blas
const uint32_t scratch_buffer_size = build_size->buildScratchSize; // TODO vs build_size.updateScratchSize
if (MAX_SCRATCH_BUFFER < g_accel.frame.scratch_offset + scratch_buffer_size) {
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
MAX_SCRATCH_BUFFER - g_accel.frame.scratch_offset,
scratch_buffer_size);
return false;
}
build_info->scratchData.deviceAddress = g_accel.scratch_buffer_addr + g_accel.frame.scratch_offset;
//uint32_t scratch_offset_initial = g_accel.frame.scratch_offset;
g_accel.frame.scratch_offset += scratch_buffer_size;
g_accel.frame.scratch_offset = ALIGN_UP(g_accel.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment);
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
g_accel.stats.accels_built++;
static int scope_id = -2;
if (scope_id == -2)
scope_id = R_VkGpuScope_Register("build_as");
const int begin_index = R_VkCombufScopeBegin(combuf, scope_id);
const VkAccelerationStructureBuildRangeInfoKHR *p_build_ranges = build_ranges;
vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, build_info, &p_build_ranges);
R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
return true;
}
// TODO split this into smaller building blocks in a separate module
qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args, vk_ray_model_t *model) {
qboolean should_create = *args->p_accel == VK_NULL_HANDLE;
@ -82,28 +208,13 @@ qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build
}
if (should_create) {
const uint32_t as_size = build_size.accelerationStructureSize;
const alo_block_t block = aloPoolAllocate(g_accel.accels_buffer_alloc, as_size, /*TODO why? align=*/256);
const uint32_t buffer_offset = block.offset;
const VkAccelerationStructureCreateInfoKHR asci = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
.buffer = g_accel.accels_buffer.buffer,
.offset = buffer_offset,
.type = args->type,
.size = as_size,
};
*args->p_accel = createAccel(args->debug_name, args->type, build_size.accelerationStructureSize);
if (buffer_offset == ALO_ALLOC_FAILED) {
gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for accel buffer\n", (uint32_t)asci.size);
if (!args->p_accel)
return false;
}
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, args->p_accel));
SET_DEBUG_NAME(*args->p_accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, args->debug_name);
if (model) {
model->size = asci.size;
model->debug.as_offset = buffer_offset;
model->size = build_size.accelerationStructureSize;
}
// gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x\n", *args->p_accel, args->n_geoms, buffer_offset, asci.size, buffer_offset + asci.size);
@ -125,7 +236,7 @@ qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
g_accel_.stats.accels_built++;
g_accel.stats.accels_built++;
static int scope_id = -2;
if (scope_id == -2)
@ -171,7 +282,7 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) {
}
}
void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
vk_resource_t RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
ASSERT(g_ray_model_state.frame.instances_count > 0);
DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas");
@ -199,7 +310,7 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
inst[i] = (VkAccelerationStructureInstanceKHR){
.instanceCustomIndex = instance->model->kusochki_offset,
.instanceShaderBindingTableRecordOffset = 0,
.accelerationStructureReference = getASAddress(instance->model->as), // TODO cache this addr
.accelerationStructureReference = getAccelAddress(instance->model->as), // TODO cache this addr
};
switch (instance->material_mode) {
case MATERIAL_MODE_OPAQUE:
@ -239,7 +350,7 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
R_VkStagingUnlock(headers_lock.handle);
}
g_accel_.stats.instances_count = g_ray_model_state.frame.instances_count;
g_accel.stats.instances_count = g_ray_model_state.frame.instances_count;
// Barrier for building all BLASes
// BLAS building is now in cmdbuf, need to synchronize with results
@ -261,6 +372,34 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
// 2. Build TLAS
createTlas(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR));
DEBUG_END(combuf->cmdbuf);
// 4. Barrier for TLAS build
{
const VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.buffer = g_accel.accels_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
} };
vkCmdPipelineBarrier(combuf->cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
return (vk_resource_t){
.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.value = (vk_descriptor_value_t){
.accel = (VkWriteDescriptorSetAccelerationStructureKHR) {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
.accelerationStructureCount = 1,
.pAccelerationStructures = &g_accel.tlas,
.pNext = NULL,
},
},
};
}
qboolean RT_VkAccelInit(void) {
@ -292,8 +431,8 @@ qboolean RT_VkAccelInit(void) {
g_accel.tlas_geom_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.tlas_geom_buffer.buffer);
R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_INSTANCES * 2);
R_SpeedsRegisterMetric(&g_accel_.stats.instances_count, "accels_instances_count", kSpeedsMetricCount);
R_SpeedsRegisterMetric(&g_accel_.stats.accels_built, "accels_built", kSpeedsMetricCount);
R_SpeedsRegisterMetric(&g_accel.stats.instances_count, "accels_instances_count", kSpeedsMetricCount);
R_SpeedsRegisterMetric(&g_accel.stats.accels_built, "accels_built", kSpeedsMetricCount);
return true;
}
@ -348,3 +487,183 @@ void RT_VkAccelNewMap(void) {
void RT_VkAccelFrameBegin(void) {
g_accel.frame.scratch_offset = 0;
}
struct rt_blas_s* RT_BlasCreate(rt_blas_usage_e usage) {
rt_blas_t *blas = Mem_Calloc(vk_core.pool, sizeof(*blas));
switch (usage) {
case kBlasBuildStatic:
break;
case kBlasBuildDynamicUpdate:
ASSERT(!"Not implemented");
break;
case kBlasBuildDynamicFast:
ASSERT(!"Not implemented");
break;
}
blas->usage = usage;
//blas->kusochki_offset = -1;
blas->blas_size = -1;
return blas;
}
struct rt_blas_s* RT_BlasCreatePreallocated(rt_blas_usage_e usage, int max_geometries, const int *max_prims, int max_vertex, uint32_t extra_buffer_offset) {
ASSERT(!"Not implemented");
#if 0
switch (usage) {
case kBlasBuildStatic:
break;
case kBlasBuildDynamicUpdate:
ASSERT(!"Not implemented");
break;
case kBlasBuildDynamicFast:
ASSERT(!"Not implemented");
break;
}
VkAccelerationStructureGeometryKHR *geoms =
g_blas.default_geometry = (VkAccelerationStructureGeometryKHR)
{
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // TODO does this conflict with tlas building? With shaders arguments?
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
.geometry.triangles =
(VkAccelerationStructureGeometryTrianglesDataKHR){
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
.indexType = VK_INDEX_TYPE_UINT16,
.maxVertex = mg->max_vertex,
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
.vertexStride = sizeof(vk_vertex_t),
.vertexData.deviceAddress = buffer_addr,
.indexData.deviceAddress = buffer_addr,
},
};
#endif
return NULL;
}
void RT_BlasDestroy(struct rt_blas_s* blas) {
if (!blas)
return;
/* if (blas->max_prims) */
/* Mem_Free(blas->max_prims); */
if (blas->blas)
vkDestroyAccelerationStructureKHR(vk_core.device, blas->blas, NULL);
Mem_Free(blas);
}
qboolean RT_BlasBuild(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count) {
if (!blas || !geoms_count)
return false;
VkAccelerationStructureBuildGeometryInfoKHR build_info = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,
.geometryCount = geoms_count,
.srcAccelerationStructure = VK_NULL_HANDLE,
};
switch (blas->usage) {
case kBlasBuildStatic:
ASSERT(!blas->blas);
build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
break;
case kBlasBuildDynamicUpdate:
ASSERT(!"Not implemented");
return false;
break;
case kBlasBuildDynamicFast:
ASSERT(!"Not implemented");
return false;
break;
}
const VkBuffer geometry_buffer = R_GeometryBuffer_Get();
const VkDeviceAddress buffer_addr = R_VkBufferGetDeviceAddress(geometry_buffer);
VkAccelerationStructureGeometryKHR *const as_geoms = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*as_geoms));
uint32_t *const max_prim_counts = Mem_Malloc(vk_core.pool, geoms_count * sizeof(*max_prim_counts));
VkAccelerationStructureBuildRangeInfoKHR *const build_ranges = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*build_ranges));
for (int i = 0; i < geoms_count; ++i) {
const vk_render_geometry_t *mg = geoms + i;
const uint32_t prim_count = mg->element_count / 3;
max_prim_counts[i] = prim_count;
as_geoms[i] = (VkAccelerationStructureGeometryKHR)
{
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // FIXME this is not true. incoming mode might have transparency eventually (and also dynamically)
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
.geometry.triangles =
(VkAccelerationStructureGeometryTrianglesDataKHR){
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
.indexType = VK_INDEX_TYPE_UINT16,
.maxVertex = mg->max_vertex,
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
.vertexStride = sizeof(vk_vertex_t),
.vertexData.deviceAddress = buffer_addr,
.indexData.deviceAddress = buffer_addr,
},
};
build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) {
.primitiveCount = prim_count,
.primitiveOffset = mg->index_offset * sizeof(uint16_t),
.firstVertex = mg->vertex_offset,
};
}
build_info.pGeometries = as_geoms;
const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, max_prim_counts);
qboolean retval = false;
// allocate blas
if (!blas->blas) {
blas->blas = createAccel("FIXME NAME", VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, build_size.accelerationStructureSize);
if (!blas->blas)
goto finalize;
blas->blas_addr = getAccelAddress(blas->blas);
blas->blas_size = build_size.accelerationStructureSize;
blas->max_geoms = build_info.geometryCount;
// TODO handle lifetime blas->max_prim_counts = max_prim_counts;
}
// Build
build_info.dstAccelerationStructure = blas->blas;
if (!buildAccel(geometry_buffer, &build_info, &build_size, build_ranges))
goto finalize;
retval = true;
// do kusochki?
finalize:
Mem_Free(as_geoms);
Mem_Free(max_prim_counts);
Mem_Free(build_ranges);
return retval;
}
// Update animated materials
void RT_BlasUpdateMaterialsSubset(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms[], const int *geoms_indices, int geoms_indices_count) {
ASSERT(!"Not implemented");
}
// Clone materials with different base_color texture (sprites)
uint32_t RT_BlasOverrideMaterial(struct rt_blas_s *blas, int texture) {
ASSERT(!"Not implemented");
return -1;
}

View File

@ -2,44 +2,45 @@
#include "vk_core.h"
#include "vk_buffer.h"
struct rt_vk_ray_accel_s {
// Stores AS built data. Lifetime similar to render buffer:
// - some portion lives for entire map lifetime
// - some portion lives only for a single frame (may have several frames in flight)
// TODO: unify this with render buffer
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
vk_buffer_t accels_buffer;
struct alo_pool_s *accels_buffer_alloc;
// Temp: lives only during a single frame (may have many in flight)
// Used for building ASes;
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
vk_buffer_t scratch_buffer;
VkDeviceAddress accels_buffer_addr, scratch_buffer_addr;
// Temp-ish: used for making TLAS, contains addressed to all used BLASes
// Lifetime and nature of usage similar to scratch_buffer
// TODO: unify them
// Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY
vk_buffer_t tlas_geom_buffer;
VkDeviceAddress tlas_geom_buffer_addr;
r_flipping_buffer_t tlas_geom_buffer_alloc;
// TODO need several TLASes for N frames in flight
VkAccelerationStructureKHR tlas;
// Per-frame data that is accumulated between RayFrameBegin and End calls
struct {
uint32_t scratch_offset; // for building dynamic blases
} frame;
};
extern struct rt_vk_ray_accel_s g_accel;
#include "vk_math.h"
#include "ray_resources.h"
qboolean RT_VkAccelInit(void);
void RT_VkAccelShutdown(void);
void RT_VkAccelNewMap(void);
struct rt_blas_s;
struct vk_render_geometry_s;
typedef enum {
kBlasBuildStatic, // builds slow for fast trace
kBlasBuildDynamicUpdate, // builds if not built, updates if built
kBlasBuildDynamicFast, // builds fast from scratch (no correlation with previous frame guaranteed, e.g. triapi)
} rt_blas_usage_e;
// Just creates an empty BLAS structure, doesn't alloc anything
struct rt_blas_s* RT_BlasCreate(rt_blas_usage_e usage);
// Create an empty BLAS with specified limits
struct rt_blas_s* RT_BlasCreatePreallocated(rt_blas_usage_e usage, int max_geometries, const int *max_prims, int max_vertex, uint32_t extra_buffer_offset);
void RT_BlasDestroy(struct rt_blas_s* blas);
// 1. Schedules BLAS build (allocates geoms+ranges from a temp pool, etc).
// 2. Allocates kusochki (if not) and fills them with geom and initial material data
qboolean RT_BlasBuild(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count);
typedef struct {
const struct rt_blas_s* blas;
int material_mode;
matrix3x4 *transform, *prev_transform;
vec4_t *color;
uint32_t material_override;
} rt_frame_add_model_args_t;
void RT_VkAccelFrameBegin(void);
void RT_VkAccelFrameAddBlas( rt_frame_add_model_args_t args );
struct vk_combuf_s;
void RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf);
vk_resource_t RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf);

View File

@ -24,10 +24,6 @@ typedef struct vk_ray_model_s {
uint32_t material_mode;
vec4_t color;
matrix4x4 prev_transform;
struct {
uint32_t as_offset;
} debug;
} vk_ray_model_t;
typedef struct Kusok vk_kusok_data_t;
@ -52,6 +48,9 @@ typedef struct {
struct vk_combuf_s;
qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args, vk_ray_model_t *model);
#define MAX_SCRATCH_BUFFER (32*1024*1024)
#define MAX_ACCELS_BUFFER (64*1024*1024)
typedef struct {
// Geometry metadata. Lifetime is similar to geometry lifetime itself.
// Semantically close to render buffer (describes layout for those objects)

View File

@ -278,7 +278,7 @@ static void computeConveyorSpeed(const color24 rendercolor, int tex_index, vec2_
}
// TODO utilize uploadKusochki([1]) to avoid 2 copies of staging code
static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, uint32_t material_mode, const int *geom_indexes, int geom_indexes_count) {
static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, const int *geom_indexes, int geom_indexes_count) {
// TODO can we sort all animated geometries (in brush) to have only a single range here?
for (int i = 0; i < geom_indexes_count; ++i) {
const int index = geom_indexes[i];
@ -314,7 +314,7 @@ static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk
return true;
}
static qboolean uploadKusochki(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, uint32_t material_mode) {
static qboolean uploadKusochki(const vk_ray_model_t *const model, const vk_render_model_t *const render_model) {
const vk_staging_buffer_args_t staging_args = {
.buffer = g_ray_model_state.kusochki_buffer.buffer,
.offset = model->kusochki_offset * sizeof(vk_kusok_data_t),
@ -399,11 +399,11 @@ void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render
model->material_mode = material_mode;
Vector4Copy(render_model->color, model->color);
Matrix4x4_Copy(model->prev_transform, render_model->prev_transform);
if (!uploadKusochki(model, render_model, material_mode)) {
if (!uploadKusochki(model, render_model)) {
return;
}
} else {
if (!uploadKusochkiSubset(model, render_model, material_mode, render_model->geometries_changed, render_model->geometries_changed_count))
if (!uploadKusochkiSubset(model, render_model, render_model->geometries_changed, render_model->geometries_changed_count))
return;
}

View File

@ -170,18 +170,6 @@ typedef struct {
static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* args) {
const VkCommandBuffer cmdbuf = combuf->cmdbuf;
// TODO move this to "TLAS producer"
g_rtx.res[ExternalResource_tlas].resource = (vk_resource_t){
.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.value = (vk_descriptor_value_t){
.accel = (VkWriteDescriptorSetAccelerationStructureKHR) {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
.accelerationStructureCount = 1,
.pAccelerationStructures = &g_accel.tlas,
.pNext = NULL,
},
},
};
#define RES_SET_BUFFER(name, type_, source_, offset_, size_) \
g_rtx.res[ExternalResource_##name].resource = (vk_resource_t){ \
@ -278,24 +266,11 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a
}
DEBUG_BEGIN(cmdbuf, "yay tracing");
RT_VkAccelPrepareTlas(combuf);
prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y);
// 4. Barrier for TLAS build
{
const VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.buffer = g_accel.accels_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
} };
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
// TODO move this to "TLAS producer"
g_rtx.res[ExternalResource_tlas].resource = RT_VkAccelPrepareTlas(combuf);
prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y);
{ // FIXME this should be done automatically inside meatpipe, TODO
//const uint32_t size = sizeof(struct Lights);

View File

@ -52,3 +52,17 @@ void VK_RayMapLoadEnd( void );
qboolean VK_RayInit( void );
void VK_RayShutdown( void );
typedef struct rt_model_s {
const struct rt_blas_s *blas;
uint32_t kusochki_offset;
} rt_model_t;
struct vk_render_geometry_s;
void RT_ModelUploadKusochki(rt_model_t *model, const struct vk_render_geometry_s *geoms[], int geoms_count);
// Update animated materials
struct vk_render_geometry_s;
void RT_ModelUpdateMaterialsSubset(rt_model_t *model, const struct vk_render_geometry_s *geoms[], const int *geoms_indices, int geoms_indices_count);
// Clone materials with different base_color texture (sprites)
void RT_ModelOverrideMaterial(struct rt_blas_s *blas, int texture);