rt: prepare for blas mgmt refactoring

1. Rename models passed to TLAS to instances.
2. Remove BLAS validation: old, doesn't make sense anymore.
3. Draft general blas mgmt approach in NOTES.md
This commit is contained in:
Ivan Avdeev 2023-05-17 10:42:18 -07:00
parent 7d6c12218f
commit 14a648d16c
5 changed files with 140 additions and 125 deletions

View File

@ -69,3 +69,95 @@ However, there are several staging cmdbuf usages which are technically out-of-ba
TriGetMatrix,
TriFogParams,
TriCullFace,
# Better BLAS management API
~~
BLAS:
- geom_count => kusok.geom/material.size() == geom_count
Model types:
1. Fully static (brush model w/o animated textures; studio model w/o animations): singleton, fixed geoms and materials, uploaded only once
2. Semi-static (brush model w/ animated textures): singleton, fixed geoms, may update materials, inplace (e.g. animated textures)
3. Dynamic (beams, triapi, etc): singleton, may update both geoms and materials, inplace
4. Template (sprites): used by multiple instances, fixed geom, multiple materials (colors, textures etc) instances/copies
5. Update-from template (studo models): used by multiple dynamic models, deriving from it wvia BLAS UPDATE, dynamic geom+locations, fixed-ish materials.
API ~
1. RT_ModelCreate(geometries_count dynamic?static?) -> rt_model + preallocated mem
2. RT_ModelBuild/Update(geometries[]) -> (blas + kusok.geom[])
3. RT_ModelUpdateMaterials(model, geometries/textures/materials[]); -> (kusok.material[])
4. RT_FrameAddModel(model + kusok.geom[] + kusok.material[] + render_type + xform + color)
struct rt_model_s;
typedef struct {
const struct rt_model_s* model;
vk_render_type_e render_type;
matrix3x4 transform, prev_transform;
vec4_t color;
} rt_frame_add_model_args_t;
void RT_FrameAddModel( rt_frame_add_model_args_t args );
~~
rt_instance_t/rt_blas_t:
- VkAS blas
- VkASGeometry geom[] -> (vertex+index buffer address)
- VkASBuildRangeInfo ranges[] -> (vtxidx buffer offsets)
- ~~TODO: updateable: blas[2]? Ping-pong update, cannot do inplace?~~ Nope, can do inplace.
- kusochki
- kusok[]
- geometry -> (vtxidx buffer offsets)
- TODO roughly the same data as VkASBuildRangeInfo, can reuse?
- material (currently embedded in kusok)
- static: tex[], scalar[]
- semi-dynamic:
- (a few) animated tex_base_color
- emissive
- animated with tex_base_color
- individual per-surface patches
- TODO: extract as a different modality not congruent with kusok data
Usage cases for the above:
1. (Fully+semi) static.
- Accept geom[] from above with vtx+idx refernces. Consider them static.
- Allocate static/fixed blas + kusok data once at map load.
- Allocate geom+ranges[] temporarily. Fill them with vtx+idx refs.
- Build BLAS (?: how does this work with lazy/deferred BLAS building wrt geom+ranges allocation)
- Similar to staging: collect everything + temp data, then commit.
- Needs BLAS manager, similar to vk_staging
- Generate Kusok data with current geoms and materials
- Free geom+ranges
- Each frame:
- (semi-static only) Update kusochki materials for animated textures
- Add blas+kusochki_offset (+dynamic color/xform/mmode) to TLAS
2. Preallocated dynamic (triapi)
- Preallocate for fixed N geoms:
- geom+ranges[N].
- BLAS for N geometries
- kusochki[N]
- Each frame:
- Fill geom+ranges with geom data fed from outside
- Fill kusochki --//--
- Fast-Build BLAS as new
- Add to TLAS
3. Dynamic with update (animated studio models, beams)
- When a new studio model entity is encountered:
- Allocate:
- AT FIXED OFFSET: vtx+idx block
- geom+ranges[N], BLAS for N, kusochki[N]
- Each frame:
- Fill geom+ranges with geom data
- Fill kusochki --//--
- First frame: BLAS as new
- Next frames: UPDATE BLAS in-place (depends on fixed offsets for vtx+idx)
- Add to TLAS
4. Instanced (sprites, studio models w/o animations).
- Same as static, BUT potentially dynamic and different materials. I.e. have to have per-instance kusochki copies with slightly different material contents.
- I.e. each frame
- If modifying materials (e.g. different texture for sprites):
- allocate temporary (for this frame only) kusochki block
- fill geom+material kusochki data
- Add to TLAS w/ correct kusochki offset.

View File

@ -17,7 +17,7 @@ struct rt_vk_ray_accel_s g_accel = {0};
static struct {
struct {
int blas_count;
int instances_count;
int accels_built;
} stats;
} g_accel_;
@ -150,9 +150,9 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) {
},
},
};
const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_ACCELS }; //cmdbuf == VK_NULL_HANDLE ? MAX_ACCELS : g_ray_model_state.frame.num_models };
const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_INSTANCES }; //cmdbuf == VK_NULL_HANDLE ? MAX_ACCELS : g_ray_model_state.frame.instances_count };
const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = {
.primitiveCount = g_ray_model_state.frame.num_models,
.primitiveCount = g_ray_model_state.frame.instances_count,
};
const as_build_args_t asrgs = {
.geoms = tl_geom,
@ -172,12 +172,12 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) {
}
void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
ASSERT(g_ray_model_state.frame.num_models > 0);
ASSERT(g_ray_model_state.frame.instances_count > 0);
DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas");
R_FlippingBuffer_Flip( &g_accel.tlas_geom_buffer_alloc );
const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.num_models, 1);
const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.instances_count, 1);
ASSERT(instance_offset != ALO_ALLOC_FAILED);
// Upload all blas instances references to GPU mem
@ -185,23 +185,23 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
const vk_staging_region_t headers_lock = R_VkStagingLockForBuffer((vk_staging_buffer_args_t){
.buffer = g_ray_model_state.model_headers_buffer.buffer,
.offset = 0,
.size = g_ray_model_state.frame.num_models * sizeof(struct ModelHeader),
.size = g_ray_model_state.frame.instances_count * sizeof(struct ModelHeader),
.alignment = 16,
});
ASSERT(headers_lock.ptr);
VkAccelerationStructureInstanceKHR* inst = ((VkAccelerationStructureInstanceKHR*)g_accel.tlas_geom_buffer.mapped) + instance_offset;
for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) {
const vk_ray_draw_model_t* const model = g_ray_model_state.frame.models + i;
ASSERT(model->model);
ASSERT(model->model->as != VK_NULL_HANDLE);
for (int i = 0; i < g_ray_model_state.frame.instances_count; ++i) {
const rt_draw_instance_t* const instance = g_ray_model_state.frame.instances + i;
ASSERT(instance->model);
ASSERT(instance->model->as != VK_NULL_HANDLE);
inst[i] = (VkAccelerationStructureInstanceKHR){
.instanceCustomIndex = model->model->kusochki_offset,
.instanceCustomIndex = instance->model->kusochki_offset,
.instanceShaderBindingTableRecordOffset = 0,
.accelerationStructureReference = getASAddress(model->model->as), // TODO cache this addr
.accelerationStructureReference = getASAddress(instance->model->as), // TODO cache this addr
};
switch (model->material_mode) {
switch (instance->material_mode) {
case MATERIAL_MODE_OPAQUE:
inst[i].mask = GEOMETRY_BIT_OPAQUE;
inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_REGULAR,
@ -225,21 +225,21 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR;
break;
default:
gEngine.Host_Error("Unexpected material mode %d\n", model->material_mode);
gEngine.Host_Error("Unexpected material mode %d\n", instance->material_mode);
break;
}
memcpy(&inst[i].transform, model->transform_row, sizeof(VkTransformMatrixKHR));
memcpy(&inst[i].transform, instance->transform_row, sizeof(VkTransformMatrixKHR));
struct ModelHeader *const header = ((struct ModelHeader*)headers_lock.ptr) + i;
header->mode = model->material_mode;
Vector4Copy(model->model->color, header->color);
Matrix4x4_ToArrayFloatGL(model->model->prev_transform, (float*)header->prev_transform);
header->mode = instance->material_mode;
Vector4Copy(instance->model->color, header->color);
Matrix4x4_ToArrayFloatGL(instance->model->prev_transform, (float*)header->prev_transform);
}
R_VkStagingUnlock(headers_lock.handle);
}
g_accel_.stats.blas_count = g_ray_model_state.frame.num_models;
g_accel_.stats.instances_count = g_ray_model_state.frame.instances_count;
// Barrier for building all BLASes
// BLAS building is now in cmdbuf, need to synchronize with results
@ -250,7 +250,7 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR,
.buffer = g_accel.accels_buffer.buffer,
.offset = instance_offset * sizeof(VkAccelerationStructureInstanceKHR),
.size = g_ray_model_state.frame.num_models * sizeof(VkAccelerationStructureInstanceKHR),
.size = g_ray_model_state.frame.instances_count * sizeof(VkAccelerationStructureInstanceKHR),
}};
vkCmdPipelineBarrier(combuf->cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
@ -282,7 +282,7 @@ qboolean RT_VkAccelInit(void) {
g_accel.scratch_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.scratch_buffer.buffer);
// TODO this doesn't really need to be host visible, use staging
if (!VK_BufferCreate("ray tlas_geom_buffer", &g_accel.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_ACCELS * 2,
if (!VK_BufferCreate("ray tlas_geom_buffer", &g_accel.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_INSTANCES * 2,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
@ -290,9 +290,9 @@ qboolean RT_VkAccelInit(void) {
return false;
}
g_accel.tlas_geom_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.tlas_geom_buffer.buffer);
R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_ACCELS * 2);
R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_INSTANCES * 2);
R_SpeedsRegisterMetric(&g_accel_.stats.blas_count, "blas_count", kSpeedsMetricCount);
R_SpeedsRegisterMetric(&g_accel_.stats.instances_count, "accels_instances_count", kSpeedsMetricCount);
R_SpeedsRegisterMetric(&g_accel_.stats.accels_built, "accels_built", kSpeedsMetricCount);
return true;

View File

@ -4,7 +4,7 @@
#include "vk_buffer.h"
#include "vk_const.h"
#define MAX_ACCELS 2048
#define MAX_INSTANCES 2048
#define MAX_KUSOCHKI 32768
#define MODEL_CACHE_SIZE 2048
@ -32,11 +32,11 @@ typedef struct vk_ray_model_s {
typedef struct Kusok vk_kusok_data_t;
typedef struct {
typedef struct rt_draw_instance_s {
matrix3x4 transform_row;
vk_ray_model_t *model;
uint32_t material_mode; // MATERIAL_MODE_ from ray_interop.h
} vk_ray_draw_model_t;
} rt_draw_instance_t;
typedef struct {
const char *debug_name;
@ -66,9 +66,9 @@ typedef struct {
// Per-frame data that is accumulated between RayFrameBegin and End calls
struct {
int num_models;
int num_lighttextures;
vk_ray_draw_model_t models[MAX_ACCELS];
rt_draw_instance_t instances[MAX_INSTANCES];
int instances_count;
uint32_t scratch_offset; // for building dynamic blases
} frame;

View File

@ -84,77 +84,6 @@ static vk_ray_model_t *getModelFromCache(int num_geoms, int max_prims, const VkA
return model;
}
static void assertNoOverlap( uint32_t o1, uint32_t s1, uint32_t o2, uint32_t s2 ) {
uint32_t min_offset, min_size;
uint32_t max_offset;
if (o1 < o2) {
min_offset = o1;
min_size = s1;
max_offset = o2;
} else {
min_offset = o2;
min_size = s2;
max_offset = o1;
}
ASSERT(min_offset + min_size <= max_offset);
}
static void validateModelPair( const vk_ray_model_t *m1, const vk_ray_model_t *m2 ) {
if (m1 == m2) return;
if (!m2->num_geoms) return;
assertNoOverlap(m1->debug.as_offset, m1->size, m2->debug.as_offset, m2->size);
if (m1->taken && m2->taken)
assertNoOverlap(m1->kusochki_offset, m1->num_geoms, m2->kusochki_offset, m2->num_geoms);
}
static void validateModel( const vk_ray_model_t *model ) {
for (int j = 0; j < ARRAYSIZE(g_ray_model_state.models_cache); ++j) {
validateModelPair(model, g_ray_model_state.models_cache + j);
}
}
static void validateModels( void ) {
for (int i = 0; i < ARRAYSIZE(g_ray_model_state.models_cache); ++i) {
validateModel(g_ray_model_state.models_cache + i);
}
}
void XVK_RayModel_Validate( void ) {
const vk_kusok_data_t* kusochki = g_ray_model_state.kusochki_buffer.mapped;
ASSERT(g_ray_model_state.frame.num_models <= ARRAYSIZE(g_ray_model_state.frame.models));
for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) {
const vk_ray_draw_model_t *draw_model = g_ray_model_state.frame.models + i;
const vk_ray_model_t *model = draw_model->model;
int num_geoms = 1; // TODO can't validate non-dynamic models because this info is lost
ASSERT(model);
ASSERT(model->as != VK_NULL_HANDLE);
ASSERT(model->kusochki_offset < MAX_KUSOCHKI);
ASSERT(model->geoms);
ASSERT(model->num_geoms > 0);
ASSERT(model->taken);
num_geoms = model->num_geoms;
for (int j = 0; j < num_geoms; j++) {
const vk_kusok_data_t *kusok = kusochki + j;
const vk_texture_t *tex = findTexture(kusok->material.tex_base_color);
ASSERT(tex);
ASSERT(tex->vk.image.view != VK_NULL_HANDLE);
// uint32_t index_offset;
// uint32_t vertex_offset;
// uint32_t triangles;
}
// Check for as model memory aliasing
for (int j = 0; j < g_ray_model_state.frame.num_models; ++j) {
const vk_ray_model_t *model2 = g_ray_model_state.frame.models[j].model;
validateModelPair(model, model2);
}
}
}
static void applyMaterialToKusok(vk_kusok_data_t* kusok, const vk_render_geometry_t *geom) {
const xvk_material_t *const mat = XVK_GetMaterialForTextureIndex( geom->texture );
ASSERT(mat);
@ -295,9 +224,6 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) {
ray_model->material_mode = -1;
Vector4Set(ray_model->color, 1, 1, 1, 1);
Matrix4x4_LoadIdentity(ray_model->prev_transform);
if (vk_core.debug)
validateModel(ray_model);
}
}
}
@ -422,19 +348,21 @@ static qboolean uploadKusochki(const vk_ray_model_t *const model, const vk_rende
}
void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render_model) {
vk_ray_draw_model_t* draw_model = g_ray_model_state.frame.models + g_ray_model_state.frame.num_models;
rt_draw_instance_t* draw_instance = g_ray_model_state.frame.instances + g_ray_model_state.frame.instances_count;
ASSERT(vk_core.rtx);
ASSERT(g_ray_model_state.frame.num_models <= ARRAYSIZE(g_ray_model_state.frame.models));
ASSERT(g_ray_model_state.frame.instances_count <= ARRAYSIZE(g_ray_model_state.frame.instances));
ASSERT(model->num_geoms == render_model->num_geometries);
if (g_ray_model_state.frame.num_models == ARRAYSIZE(g_ray_model_state.frame.models)) {
if (g_ray_model_state.frame.instances_count == ARRAYSIZE(g_ray_model_state.frame.instances)) {
gEngine.Con_Printf(S_ERROR "Ran out of AccelerationStructure slots\n");
return;
}
ASSERT(model->as != VK_NULL_HANDLE);
// TODO this material mapping is context dependent. I.e. different entity types might need different ray tracing behaviours for
// same render_mode/type and even texture.
uint32_t material_mode = MATERIAL_MODE_OPAQUE;
switch (render_model->render_type) {
case kVkRenderTypeSolid:
@ -461,17 +389,12 @@ void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render
}
// Re-upload kusochki if needed
// TODO all of this will not be required when model data is split out from Kusok struct
// TODO all of this can be removed. We just need to make sure that kusochki have been uploaded once (for static models).
#define Vector4Compare(v1,v2) ((v1)[0]==(v2)[0] && (v1)[1]==(v2)[1] && (v1)[2]==(v2)[2] && (v1)[3]==(v2)[3])
const qboolean upload_kusochki = (model->material_mode != material_mode
|| !Vector4Compare(model->color, render_model->color)
|| memcmp(model->prev_transform, render_model->prev_transform, sizeof(matrix4x4)) != 0);
// TODO optimize:
// - collect list of geoms for which we could update anything (animated textues, uvs, etc)
// - update only those through staging
// - also consider tracking whether the main model color has changed (that'd need to update everything yay)
if (upload_kusochki) {
model->material_mode = material_mode;
Vector4Copy(render_model->color, model->color);
@ -491,11 +414,11 @@ void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render
RT_LightAddPolygon(polylight);
}
draw_model->model = model;
draw_model->material_mode = material_mode;
Matrix3x4_Copy(draw_model->transform_row, render_model->transform);
draw_instance->model = model;
draw_instance->material_mode = material_mode;
Matrix3x4_Copy(draw_instance->transform_row, render_model->transform);
g_ray_model_state.frame.num_models++;
g_ray_model_state.frame.instances_count++;
}
void RT_RayModel_Clear(void) {
@ -507,18 +430,18 @@ void XVK_RayModel_ClearForNextFrame( void ) {
// currently framectl waits for the queue to complete before returning
// so we can be sure here that previous frame is complete and we're free to
// destroy/reuse dynamic ASes from previous frame
for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) {
vk_ray_draw_model_t *model = g_ray_model_state.frame.models + i;
ASSERT(model->model);
for (int i = 0; i < g_ray_model_state.frame.instances_count; ++i) {
rt_draw_instance_t *instance = g_ray_model_state.frame.instances + i;
ASSERT(instance->model);
if (!model->model->dynamic)
if (!instance->model->dynamic)
continue;
returnModelToCache(model->model);
model->model = NULL;
returnModelToCache(instance->model);
instance->model = NULL;
}
g_ray_model_state.frame.num_models = 0;
g_ray_model_state.frame.instances_count = 0;
// TODO N frames in flight
// HACK: blas caching requires persistent memory

View File

@ -563,7 +563,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
ASSERT(g_rtx.mainpipe_out);
if (g_ray_model_state.frame.num_models == 0) {
if (g_ray_model_state.frame.instances_count == 0) {
const r_vkimage_blit_args blit_args = {
.in_stage = VK_PIPELINE_STAGE_TRANSFER_BIT,
.src = {
@ -641,7 +641,7 @@ qboolean VK_RayInit( void )
return false;
}
if (!VK_BufferCreate("model headers", &g_ray_model_state.model_headers_buffer, sizeof(struct ModelHeader) * MAX_ACCELS,
if (!VK_BufferCreate("model headers", &g_ray_model_state.model_headers_buffer, sizeof(struct ModelHeader) * MAX_INSTANCES,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
// FIXME complain, handle