rt: start refactoring blas/tlas mgmt code
Draft the new accel/blas apis. Consolidate everything accel-related into vk_ray_accel.c. Start splitting into more atomic functions. Prepare for blas-model+kusochki split. etc etc. The new code isn't really used yet.
This commit is contained in:
parent
4af9f65cd0
commit
b65f84793a
|
@ -168,31 +168,8 @@ Exposed ops:
|
|||
- build vs update
|
||||
- Add to TLAS w/ color/xform/mmode/...
|
||||
|
||||
// just creates BLAS, doesn't alloc anything
|
||||
rt_blas_t RT_BlasAlloc(int max_geometries);
|
||||
|
||||
typedef enum {
|
||||
kBlasBuildStatic, // builds slow for fast trace
|
||||
kBlasBuildDynamicUpdate, // builds if not built, updates if built
|
||||
kBlasBuildDynamicFast, // builds fast from scratch (no correlation with previous frame guaranteed, e.g. triapi)
|
||||
} rt_blas_build_mode_e;
|
||||
|
||||
// 1. Schedules BLAS build (allocates geoms+ranges from a temp pool, etc).
|
||||
// 2. Allocates kusochki (if not) and fills them with geom and initial material data
|
||||
void RT_BlasBuild(rt_blas_t *blas, const vk_render_geometry_t *geoms[], int geoms_count, rt_blas_build_mode_e mode);
|
||||
|
||||
// Update animated kusochki
|
||||
void RT_BlasUpdateMaterialsSubset(rt_blas_t *blas...);
|
||||
|
||||
// Clone kusochki with different base_color texture (sprites)
|
||||
uint32_t RT_BlasOverrideMaterial(rt_blas_t *blas, int texture);
|
||||
|
||||
struct rt_blas_s;
|
||||
typedef struct {
|
||||
const struct rt_blas_s* model;
|
||||
int material_mode;
|
||||
matrix3x4 transform, prev_transform;
|
||||
vec4_t color;
|
||||
uint32_t material_override;
|
||||
} rt_frame_add_model_args_t;
|
||||
void RT_FrameAddBlasInstance( rt_frame_add_model_args_t args );
|
||||
- geometry_buffer -- vtx+idx static + multi-frame dynamic + single-frame dynamic
|
||||
- kusochki_buffer -- kusok[] static + dynamic + clone_dynamic
|
||||
- accel_buffer -- static + multiframe dynamic + single-frame dynamic
|
||||
- scratch_buffer - single-frame dynamic
|
||||
- model_buffer - single-frame dynamic
|
||||
|
|
|
@ -7,22 +7,97 @@
|
|||
#include "vk_combuf.h"
|
||||
#include "vk_staging.h"
|
||||
#include "vk_math.h"
|
||||
#include "vk_geometry.h"
|
||||
#include "vk_render.h"
|
||||
|
||||
#include "xash3d_mathlib.h"
|
||||
|
||||
#define MAX_SCRATCH_BUFFER (32*1024*1024)
|
||||
#define MAX_ACCELS_BUFFER (64*1024*1024)
|
||||
#ifndef ARRAYSIZE
|
||||
#define ARRAYSIZE(p) (sizeof(p)/sizeof(p[0]))
|
||||
#endif // #ifndef ARRAYSIZE
|
||||
|
||||
struct rt_vk_ray_accel_s g_accel = {0};
|
||||
typedef struct rt_blas_s {
|
||||
rt_blas_usage_e usage;
|
||||
|
||||
VkAccelerationStructureKHR blas;
|
||||
VkDeviceAddress blas_addr;
|
||||
|
||||
int max_geoms;
|
||||
//uint32_t *max_prim_counts;
|
||||
int blas_size;
|
||||
} rt_blas_t;
|
||||
|
||||
static struct {
|
||||
// Stores AS built data. Lifetime similar to render buffer:
|
||||
// - some portion lives for entire map lifetime
|
||||
// - some portion lives only for a single frame (may have several frames in flight)
|
||||
// TODO: unify this with render buffer
|
||||
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
|
||||
vk_buffer_t accels_buffer;
|
||||
struct alo_pool_s *accels_buffer_alloc;
|
||||
|
||||
// Temp: lives only during a single frame (may have many in flight)
|
||||
// Used for building ASes;
|
||||
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
|
||||
vk_buffer_t scratch_buffer;
|
||||
VkDeviceAddress accels_buffer_addr, scratch_buffer_addr;
|
||||
|
||||
// Temp-ish: used for making TLAS, contains addressed to all used BLASes
|
||||
// Lifetime and nature of usage similar to scratch_buffer
|
||||
// TODO: unify them
|
||||
// Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY
|
||||
vk_buffer_t tlas_geom_buffer;
|
||||
VkDeviceAddress tlas_geom_buffer_addr;
|
||||
r_flipping_buffer_t tlas_geom_buffer_alloc;
|
||||
|
||||
// TODO need several TLASes for N frames in flight
|
||||
VkAccelerationStructureKHR tlas;
|
||||
|
||||
// Per-frame data that is accumulated between RayFrameBegin and End calls
|
||||
struct {
|
||||
uint32_t scratch_offset; // for building dynamic blases
|
||||
} frame;
|
||||
|
||||
struct {
|
||||
int instances_count;
|
||||
int accels_built;
|
||||
} stats;
|
||||
} g_accel_;
|
||||
} g_accel;
|
||||
|
||||
static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
|
||||
static VkAccelerationStructureBuildSizesInfoKHR getAccelSizes(const VkAccelerationStructureBuildGeometryInfoKHR *build_info, const uint32_t *max_prim_counts) {
|
||||
VkAccelerationStructureBuildSizesInfoKHR build_size = {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR
|
||||
};
|
||||
|
||||
vkGetAccelerationStructureBuildSizesKHR(
|
||||
vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, build_info, max_prim_counts, &build_size);
|
||||
|
||||
return build_size;
|
||||
}
|
||||
|
||||
static VkAccelerationStructureKHR createAccel(const char *name, VkAccelerationStructureTypeKHR type, uint32_t size) {
|
||||
const alo_block_t block = aloPoolAllocate(g_accel.accels_buffer_alloc, size, /*TODO why? align=*/256);
|
||||
|
||||
if (block.offset == ALO_ALLOC_FAILED) {
|
||||
gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for blas \"%s\"\n", size, name);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
const VkAccelerationStructureCreateInfoKHR asci = {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
|
||||
.buffer = g_accel.accels_buffer.buffer,
|
||||
.offset = block.offset,
|
||||
.type = type,
|
||||
.size = size,
|
||||
};
|
||||
|
||||
VkAccelerationStructureKHR accel = VK_NULL_HANDLE;
|
||||
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, &accel));
|
||||
SET_DEBUG_NAME(accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, name);
|
||||
return accel;
|
||||
}
|
||||
|
||||
static VkDeviceAddress getAccelAddress(VkAccelerationStructureKHR as) {
|
||||
VkAccelerationStructureDeviceAddressInfoKHR asdai = {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
|
||||
.accelerationStructure = as,
|
||||
|
@ -30,6 +105,57 @@ static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
|
|||
return vkGetAccelerationStructureDeviceAddressKHR(vk_core.device, &asdai);
|
||||
}
|
||||
|
||||
static qboolean buildAccel(VkBuffer geometry_buffer, VkAccelerationStructureBuildGeometryInfoKHR *build_info, const VkAccelerationStructureBuildSizesInfoKHR *build_size, const VkAccelerationStructureBuildRangeInfoKHR *build_ranges) {
|
||||
// FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too
|
||||
vk_combuf_t *const combuf = R_VkStagingCommit();
|
||||
{
|
||||
const VkBufferMemoryBarrier bmb[] = { {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
//.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, // FIXME
|
||||
.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT, // FIXME
|
||||
.buffer = geometry_buffer,
|
||||
.offset = 0, // FIXME
|
||||
.size = VK_WHOLE_SIZE, // FIXME
|
||||
} };
|
||||
vkCmdPipelineBarrier(combuf->cmdbuf,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
//VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
|
||||
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
|
||||
}
|
||||
|
||||
// build blas
|
||||
const uint32_t scratch_buffer_size = build_size->buildScratchSize; // TODO vs build_size.updateScratchSize
|
||||
|
||||
if (MAX_SCRATCH_BUFFER < g_accel.frame.scratch_offset + scratch_buffer_size) {
|
||||
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
|
||||
MAX_SCRATCH_BUFFER - g_accel.frame.scratch_offset,
|
||||
scratch_buffer_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
build_info->scratchData.deviceAddress = g_accel.scratch_buffer_addr + g_accel.frame.scratch_offset;
|
||||
|
||||
//uint32_t scratch_offset_initial = g_accel.frame.scratch_offset;
|
||||
g_accel.frame.scratch_offset += scratch_buffer_size;
|
||||
g_accel.frame.scratch_offset = ALIGN_UP(g_accel.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment);
|
||||
|
||||
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
|
||||
|
||||
g_accel.stats.accels_built++;
|
||||
|
||||
static int scope_id = -2;
|
||||
if (scope_id == -2)
|
||||
scope_id = R_VkGpuScope_Register("build_as");
|
||||
const int begin_index = R_VkCombufScopeBegin(combuf, scope_id);
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *p_build_ranges = build_ranges;
|
||||
vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, build_info, &p_build_ranges);
|
||||
R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO split this into smaller building blocks in a separate module
|
||||
qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args, vk_ray_model_t *model) {
|
||||
qboolean should_create = *args->p_accel == VK_NULL_HANDLE;
|
||||
|
@ -82,28 +208,13 @@ qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build
|
|||
}
|
||||
|
||||
if (should_create) {
|
||||
const uint32_t as_size = build_size.accelerationStructureSize;
|
||||
const alo_block_t block = aloPoolAllocate(g_accel.accels_buffer_alloc, as_size, /*TODO why? align=*/256);
|
||||
const uint32_t buffer_offset = block.offset;
|
||||
const VkAccelerationStructureCreateInfoKHR asci = {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
|
||||
.buffer = g_accel.accels_buffer.buffer,
|
||||
.offset = buffer_offset,
|
||||
.type = args->type,
|
||||
.size = as_size,
|
||||
};
|
||||
*args->p_accel = createAccel(args->debug_name, args->type, build_size.accelerationStructureSize);
|
||||
|
||||
if (buffer_offset == ALO_ALLOC_FAILED) {
|
||||
gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for accel buffer\n", (uint32_t)asci.size);
|
||||
if (!args->p_accel)
|
||||
return false;
|
||||
}
|
||||
|
||||
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, args->p_accel));
|
||||
SET_DEBUG_NAME(*args->p_accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, args->debug_name);
|
||||
|
||||
if (model) {
|
||||
model->size = asci.size;
|
||||
model->debug.as_offset = buffer_offset;
|
||||
model->size = build_size.accelerationStructureSize;
|
||||
}
|
||||
|
||||
// gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x\n", *args->p_accel, args->n_geoms, buffer_offset, asci.size, buffer_offset + asci.size);
|
||||
|
@ -125,7 +236,7 @@ qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build
|
|||
|
||||
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
|
||||
|
||||
g_accel_.stats.accels_built++;
|
||||
g_accel.stats.accels_built++;
|
||||
|
||||
static int scope_id = -2;
|
||||
if (scope_id == -2)
|
||||
|
@ -171,7 +282,7 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) {
|
|||
}
|
||||
}
|
||||
|
||||
void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
|
||||
vk_resource_t RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
|
||||
ASSERT(g_ray_model_state.frame.instances_count > 0);
|
||||
DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas");
|
||||
|
||||
|
@ -199,7 +310,7 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
|
|||
inst[i] = (VkAccelerationStructureInstanceKHR){
|
||||
.instanceCustomIndex = instance->model->kusochki_offset,
|
||||
.instanceShaderBindingTableRecordOffset = 0,
|
||||
.accelerationStructureReference = getASAddress(instance->model->as), // TODO cache this addr
|
||||
.accelerationStructureReference = getAccelAddress(instance->model->as), // TODO cache this addr
|
||||
};
|
||||
switch (instance->material_mode) {
|
||||
case MATERIAL_MODE_OPAQUE:
|
||||
|
@ -239,7 +350,7 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
|
|||
R_VkStagingUnlock(headers_lock.handle);
|
||||
}
|
||||
|
||||
g_accel_.stats.instances_count = g_ray_model_state.frame.instances_count;
|
||||
g_accel.stats.instances_count = g_ray_model_state.frame.instances_count;
|
||||
|
||||
// Barrier for building all BLASes
|
||||
// BLAS building is now in cmdbuf, need to synchronize with results
|
||||
|
@ -261,6 +372,34 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
|
|||
// 2. Build TLAS
|
||||
createTlas(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR));
|
||||
DEBUG_END(combuf->cmdbuf);
|
||||
|
||||
// 4. Barrier for TLAS build
|
||||
{
|
||||
const VkBufferMemoryBarrier bmb[] = { {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
||||
.buffer = g_accel.accels_buffer.buffer,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
} };
|
||||
vkCmdPipelineBarrier(combuf->cmdbuf,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
|
||||
}
|
||||
|
||||
return (vk_resource_t){
|
||||
.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
|
||||
.value = (vk_descriptor_value_t){
|
||||
.accel = (VkWriteDescriptorSetAccelerationStructureKHR) {
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
|
||||
.accelerationStructureCount = 1,
|
||||
.pAccelerationStructures = &g_accel.tlas,
|
||||
.pNext = NULL,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
qboolean RT_VkAccelInit(void) {
|
||||
|
@ -292,8 +431,8 @@ qboolean RT_VkAccelInit(void) {
|
|||
g_accel.tlas_geom_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.tlas_geom_buffer.buffer);
|
||||
R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_INSTANCES * 2);
|
||||
|
||||
R_SpeedsRegisterMetric(&g_accel_.stats.instances_count, "accels_instances_count", kSpeedsMetricCount);
|
||||
R_SpeedsRegisterMetric(&g_accel_.stats.accels_built, "accels_built", kSpeedsMetricCount);
|
||||
R_SpeedsRegisterMetric(&g_accel.stats.instances_count, "accels_instances_count", kSpeedsMetricCount);
|
||||
R_SpeedsRegisterMetric(&g_accel.stats.accels_built, "accels_built", kSpeedsMetricCount);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -348,3 +487,183 @@ void RT_VkAccelNewMap(void) {
|
|||
void RT_VkAccelFrameBegin(void) {
|
||||
g_accel.frame.scratch_offset = 0;
|
||||
}
|
||||
|
||||
struct rt_blas_s* RT_BlasCreate(rt_blas_usage_e usage) {
|
||||
rt_blas_t *blas = Mem_Calloc(vk_core.pool, sizeof(*blas));
|
||||
|
||||
switch (usage) {
|
||||
case kBlasBuildStatic:
|
||||
break;
|
||||
case kBlasBuildDynamicUpdate:
|
||||
ASSERT(!"Not implemented");
|
||||
break;
|
||||
case kBlasBuildDynamicFast:
|
||||
ASSERT(!"Not implemented");
|
||||
break;
|
||||
}
|
||||
|
||||
blas->usage = usage;
|
||||
//blas->kusochki_offset = -1;
|
||||
blas->blas_size = -1;
|
||||
|
||||
return blas;
|
||||
}
|
||||
|
||||
struct rt_blas_s* RT_BlasCreatePreallocated(rt_blas_usage_e usage, int max_geometries, const int *max_prims, int max_vertex, uint32_t extra_buffer_offset) {
|
||||
ASSERT(!"Not implemented");
|
||||
|
||||
#if 0
|
||||
switch (usage) {
|
||||
case kBlasBuildStatic:
|
||||
break;
|
||||
case kBlasBuildDynamicUpdate:
|
||||
ASSERT(!"Not implemented");
|
||||
break;
|
||||
case kBlasBuildDynamicFast:
|
||||
ASSERT(!"Not implemented");
|
||||
break;
|
||||
}
|
||||
|
||||
VkAccelerationStructureGeometryKHR *geoms =
|
||||
|
||||
g_blas.default_geometry = (VkAccelerationStructureGeometryKHR)
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
||||
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // TODO does this conflict with tlas building? With shaders arguments?
|
||||
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
|
||||
.geometry.triangles =
|
||||
(VkAccelerationStructureGeometryTrianglesDataKHR){
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
|
||||
.indexType = VK_INDEX_TYPE_UINT16,
|
||||
.maxVertex = mg->max_vertex,
|
||||
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
|
||||
.vertexStride = sizeof(vk_vertex_t),
|
||||
.vertexData.deviceAddress = buffer_addr,
|
||||
.indexData.deviceAddress = buffer_addr,
|
||||
},
|
||||
};
|
||||
#endif
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void RT_BlasDestroy(struct rt_blas_s* blas) {
|
||||
if (!blas)
|
||||
return;
|
||||
|
||||
/* if (blas->max_prims) */
|
||||
/* Mem_Free(blas->max_prims); */
|
||||
|
||||
if (blas->blas)
|
||||
vkDestroyAccelerationStructureKHR(vk_core.device, blas->blas, NULL);
|
||||
|
||||
Mem_Free(blas);
|
||||
}
|
||||
|
||||
qboolean RT_BlasBuild(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count) {
|
||||
if (!blas || !geoms_count)
|
||||
return false;
|
||||
|
||||
VkAccelerationStructureBuildGeometryInfoKHR build_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
|
||||
.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR,
|
||||
.geometryCount = geoms_count,
|
||||
.srcAccelerationStructure = VK_NULL_HANDLE,
|
||||
};
|
||||
|
||||
switch (blas->usage) {
|
||||
case kBlasBuildStatic:
|
||||
ASSERT(!blas->blas);
|
||||
build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
|
||||
build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
|
||||
break;
|
||||
case kBlasBuildDynamicUpdate:
|
||||
ASSERT(!"Not implemented");
|
||||
return false;
|
||||
break;
|
||||
case kBlasBuildDynamicFast:
|
||||
ASSERT(!"Not implemented");
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
const VkBuffer geometry_buffer = R_GeometryBuffer_Get();
|
||||
const VkDeviceAddress buffer_addr = R_VkBufferGetDeviceAddress(geometry_buffer);
|
||||
|
||||
VkAccelerationStructureGeometryKHR *const as_geoms = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*as_geoms));
|
||||
uint32_t *const max_prim_counts = Mem_Malloc(vk_core.pool, geoms_count * sizeof(*max_prim_counts));
|
||||
VkAccelerationStructureBuildRangeInfoKHR *const build_ranges = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*build_ranges));
|
||||
|
||||
for (int i = 0; i < geoms_count; ++i) {
|
||||
const vk_render_geometry_t *mg = geoms + i;
|
||||
const uint32_t prim_count = mg->element_count / 3;
|
||||
|
||||
max_prim_counts[i] = prim_count;
|
||||
as_geoms[i] = (VkAccelerationStructureGeometryKHR)
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
||||
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // FIXME this is not true. incoming mode might have transparency eventually (and also dynamically)
|
||||
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
|
||||
.geometry.triangles =
|
||||
(VkAccelerationStructureGeometryTrianglesDataKHR){
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
|
||||
.indexType = VK_INDEX_TYPE_UINT16,
|
||||
.maxVertex = mg->max_vertex,
|
||||
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
|
||||
.vertexStride = sizeof(vk_vertex_t),
|
||||
.vertexData.deviceAddress = buffer_addr,
|
||||
.indexData.deviceAddress = buffer_addr,
|
||||
},
|
||||
};
|
||||
|
||||
build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) {
|
||||
.primitiveCount = prim_count,
|
||||
.primitiveOffset = mg->index_offset * sizeof(uint16_t),
|
||||
.firstVertex = mg->vertex_offset,
|
||||
};
|
||||
}
|
||||
|
||||
build_info.pGeometries = as_geoms;
|
||||
|
||||
const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, max_prim_counts);
|
||||
|
||||
qboolean retval = false;
|
||||
|
||||
// allocate blas
|
||||
if (!blas->blas) {
|
||||
blas->blas = createAccel("FIXME NAME", VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, build_size.accelerationStructureSize);
|
||||
if (!blas->blas)
|
||||
goto finalize;
|
||||
|
||||
blas->blas_addr = getAccelAddress(blas->blas);
|
||||
blas->blas_size = build_size.accelerationStructureSize;
|
||||
blas->max_geoms = build_info.geometryCount;
|
||||
// TODO handle lifetime blas->max_prim_counts = max_prim_counts;
|
||||
}
|
||||
|
||||
// Build
|
||||
build_info.dstAccelerationStructure = blas->blas;
|
||||
if (!buildAccel(geometry_buffer, &build_info, &build_size, build_ranges))
|
||||
goto finalize;
|
||||
|
||||
retval = true;
|
||||
|
||||
// do kusochki?
|
||||
|
||||
finalize:
|
||||
Mem_Free(as_geoms);
|
||||
Mem_Free(max_prim_counts);
|
||||
Mem_Free(build_ranges);
|
||||
return retval;
|
||||
}
|
||||
|
||||
// Update animated materials
|
||||
void RT_BlasUpdateMaterialsSubset(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms[], const int *geoms_indices, int geoms_indices_count) {
|
||||
ASSERT(!"Not implemented");
|
||||
}
|
||||
|
||||
// Clone materials with different base_color texture (sprites)
|
||||
uint32_t RT_BlasOverrideMaterial(struct rt_blas_s *blas, int texture) {
|
||||
ASSERT(!"Not implemented");
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -2,44 +2,45 @@
|
|||
|
||||
#include "vk_core.h"
|
||||
#include "vk_buffer.h"
|
||||
|
||||
struct rt_vk_ray_accel_s {
|
||||
// Stores AS built data. Lifetime similar to render buffer:
|
||||
// - some portion lives for entire map lifetime
|
||||
// - some portion lives only for a single frame (may have several frames in flight)
|
||||
// TODO: unify this with render buffer
|
||||
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
|
||||
vk_buffer_t accels_buffer;
|
||||
struct alo_pool_s *accels_buffer_alloc;
|
||||
|
||||
// Temp: lives only during a single frame (may have many in flight)
|
||||
// Used for building ASes;
|
||||
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
|
||||
vk_buffer_t scratch_buffer;
|
||||
VkDeviceAddress accels_buffer_addr, scratch_buffer_addr;
|
||||
|
||||
// Temp-ish: used for making TLAS, contains addressed to all used BLASes
|
||||
// Lifetime and nature of usage similar to scratch_buffer
|
||||
// TODO: unify them
|
||||
// Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY
|
||||
vk_buffer_t tlas_geom_buffer;
|
||||
VkDeviceAddress tlas_geom_buffer_addr;
|
||||
r_flipping_buffer_t tlas_geom_buffer_alloc;
|
||||
|
||||
// TODO need several TLASes for N frames in flight
|
||||
VkAccelerationStructureKHR tlas;
|
||||
|
||||
// Per-frame data that is accumulated between RayFrameBegin and End calls
|
||||
struct {
|
||||
uint32_t scratch_offset; // for building dynamic blases
|
||||
} frame;
|
||||
};
|
||||
|
||||
extern struct rt_vk_ray_accel_s g_accel;
|
||||
#include "vk_math.h"
|
||||
#include "ray_resources.h"
|
||||
|
||||
qboolean RT_VkAccelInit(void);
|
||||
void RT_VkAccelShutdown(void);
|
||||
|
||||
void RT_VkAccelNewMap(void);
|
||||
|
||||
struct rt_blas_s;
|
||||
struct vk_render_geometry_s;
|
||||
|
||||
typedef enum {
|
||||
kBlasBuildStatic, // builds slow for fast trace
|
||||
kBlasBuildDynamicUpdate, // builds if not built, updates if built
|
||||
kBlasBuildDynamicFast, // builds fast from scratch (no correlation with previous frame guaranteed, e.g. triapi)
|
||||
} rt_blas_usage_e;
|
||||
|
||||
// Just creates an empty BLAS structure, doesn't alloc anything
|
||||
struct rt_blas_s* RT_BlasCreate(rt_blas_usage_e usage);
|
||||
|
||||
// Create an empty BLAS with specified limits
|
||||
struct rt_blas_s* RT_BlasCreatePreallocated(rt_blas_usage_e usage, int max_geometries, const int *max_prims, int max_vertex, uint32_t extra_buffer_offset);
|
||||
|
||||
void RT_BlasDestroy(struct rt_blas_s* blas);
|
||||
|
||||
// 1. Schedules BLAS build (allocates geoms+ranges from a temp pool, etc).
|
||||
// 2. Allocates kusochki (if not) and fills them with geom and initial material data
|
||||
qboolean RT_BlasBuild(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count);
|
||||
|
||||
typedef struct {
|
||||
const struct rt_blas_s* blas;
|
||||
int material_mode;
|
||||
matrix3x4 *transform, *prev_transform;
|
||||
vec4_t *color;
|
||||
uint32_t material_override;
|
||||
} rt_frame_add_model_args_t;
|
||||
|
||||
void RT_VkAccelFrameBegin(void);
|
||||
void RT_VkAccelFrameAddBlas( rt_frame_add_model_args_t args );
|
||||
|
||||
struct vk_combuf_s;
|
||||
void RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf);
|
||||
vk_resource_t RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf);
|
||||
|
|
|
@ -24,10 +24,6 @@ typedef struct vk_ray_model_s {
|
|||
uint32_t material_mode;
|
||||
vec4_t color;
|
||||
matrix4x4 prev_transform;
|
||||
|
||||
struct {
|
||||
uint32_t as_offset;
|
||||
} debug;
|
||||
} vk_ray_model_t;
|
||||
|
||||
typedef struct Kusok vk_kusok_data_t;
|
||||
|
@ -52,6 +48,9 @@ typedef struct {
|
|||
struct vk_combuf_s;
|
||||
qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args, vk_ray_model_t *model);
|
||||
|
||||
#define MAX_SCRATCH_BUFFER (32*1024*1024)
|
||||
#define MAX_ACCELS_BUFFER (64*1024*1024)
|
||||
|
||||
typedef struct {
|
||||
// Geometry metadata. Lifetime is similar to geometry lifetime itself.
|
||||
// Semantically close to render buffer (describes layout for those objects)
|
||||
|
|
|
@ -278,7 +278,7 @@ static void computeConveyorSpeed(const color24 rendercolor, int tex_index, vec2_
|
|||
}
|
||||
|
||||
// TODO utilize uploadKusochki([1]) to avoid 2 copies of staging code
|
||||
static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, uint32_t material_mode, const int *geom_indexes, int geom_indexes_count) {
|
||||
static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, const int *geom_indexes, int geom_indexes_count) {
|
||||
// TODO can we sort all animated geometries (in brush) to have only a single range here?
|
||||
for (int i = 0; i < geom_indexes_count; ++i) {
|
||||
const int index = geom_indexes[i];
|
||||
|
@ -314,7 +314,7 @@ static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk
|
|||
return true;
|
||||
}
|
||||
|
||||
static qboolean uploadKusochki(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, uint32_t material_mode) {
|
||||
static qboolean uploadKusochki(const vk_ray_model_t *const model, const vk_render_model_t *const render_model) {
|
||||
const vk_staging_buffer_args_t staging_args = {
|
||||
.buffer = g_ray_model_state.kusochki_buffer.buffer,
|
||||
.offset = model->kusochki_offset * sizeof(vk_kusok_data_t),
|
||||
|
@ -399,11 +399,11 @@ void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render
|
|||
model->material_mode = material_mode;
|
||||
Vector4Copy(render_model->color, model->color);
|
||||
Matrix4x4_Copy(model->prev_transform, render_model->prev_transform);
|
||||
if (!uploadKusochki(model, render_model, material_mode)) {
|
||||
if (!uploadKusochki(model, render_model)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (!uploadKusochkiSubset(model, render_model, material_mode, render_model->geometries_changed, render_model->geometries_changed_count))
|
||||
if (!uploadKusochkiSubset(model, render_model, render_model->geometries_changed, render_model->geometries_changed_count))
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -170,18 +170,6 @@ typedef struct {
|
|||
|
||||
static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* args) {
|
||||
const VkCommandBuffer cmdbuf = combuf->cmdbuf;
|
||||
// TODO move this to "TLAS producer"
|
||||
g_rtx.res[ExternalResource_tlas].resource = (vk_resource_t){
|
||||
.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
|
||||
.value = (vk_descriptor_value_t){
|
||||
.accel = (VkWriteDescriptorSetAccelerationStructureKHR) {
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
|
||||
.accelerationStructureCount = 1,
|
||||
.pAccelerationStructures = &g_accel.tlas,
|
||||
.pNext = NULL,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#define RES_SET_BUFFER(name, type_, source_, offset_, size_) \
|
||||
g_rtx.res[ExternalResource_##name].resource = (vk_resource_t){ \
|
||||
|
@ -278,24 +266,11 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a
|
|||
}
|
||||
|
||||
DEBUG_BEGIN(cmdbuf, "yay tracing");
|
||||
RT_VkAccelPrepareTlas(combuf);
|
||||
prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y);
|
||||
|
||||
// 4. Barrier for TLAS build
|
||||
{
|
||||
const VkBufferMemoryBarrier bmb[] = { {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
||||
.buffer = g_accel.accels_buffer.buffer,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
} };
|
||||
vkCmdPipelineBarrier(cmdbuf,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
|
||||
}
|
||||
// TODO move this to "TLAS producer"
|
||||
g_rtx.res[ExternalResource_tlas].resource = RT_VkAccelPrepareTlas(combuf);
|
||||
|
||||
prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y);
|
||||
|
||||
{ // FIXME this should be done automatically inside meatpipe, TODO
|
||||
//const uint32_t size = sizeof(struct Lights);
|
||||
|
|
|
@ -52,3 +52,17 @@ void VK_RayMapLoadEnd( void );
|
|||
qboolean VK_RayInit( void );
|
||||
void VK_RayShutdown( void );
|
||||
|
||||
typedef struct rt_model_s {
|
||||
const struct rt_blas_s *blas;
|
||||
uint32_t kusochki_offset;
|
||||
} rt_model_t;
|
||||
|
||||
struct vk_render_geometry_s;
|
||||
void RT_ModelUploadKusochki(rt_model_t *model, const struct vk_render_geometry_s *geoms[], int geoms_count);
|
||||
|
||||
// Update animated materials
|
||||
struct vk_render_geometry_s;
|
||||
void RT_ModelUpdateMaterialsSubset(rt_model_t *model, const struct vk_render_geometry_s *geoms[], const int *geoms_indices, int geoms_indices_count);
|
||||
|
||||
// Clone materials with different base_color texture (sprites)
|
||||
void RT_ModelOverrideMaterial(struct rt_blas_s *blas, int texture);
|
||||
|
|
Loading…
Reference in New Issue