diff --git a/ref/vk/NOTES.md b/ref/vk/NOTES.md index ed768748..2337d626 100644 --- a/ref/vk/NOTES.md +++ b/ref/vk/NOTES.md @@ -69,3 +69,95 @@ However, there are several staging cmdbuf usages which are technically out-of-ba TriGetMatrix, TriFogParams, TriCullFace, + + +# Better BLAS management API + +~~ +BLAS: +- geom_count => kusok.geom/material.size() == geom_count + +Model types: +1. Fully static (brush model w/o animated textures; studio model w/o animations): singleton, fixed geoms and materials, uploaded only once +2. Semi-static (brush model w/ animated textures): singleton, fixed geoms, may update materials, inplace (e.g. animated textures) +3. Dynamic (beams, triapi, etc): singleton, may update both geoms and materials, inplace +4. Template (sprites): used by multiple instances, fixed geom, multiple materials (colors, textures etc) instances/copies +5. Update-from template (studo models): used by multiple dynamic models, deriving from it wvia BLAS UPDATE, dynamic geom+locations, fixed-ish materials. + +API ~ +1. RT_ModelCreate(geometries_count dynamic?static?) -> rt_model + preallocated mem +2. RT_ModelBuild/Update(geometries[]) -> (blas + kusok.geom[]) +3. RT_ModelUpdateMaterials(model, geometries/textures/materials[]); -> (kusok.material[]) +4. RT_FrameAddModel(model + kusok.geom[] + kusok.material[] + render_type + xform + color) + +struct rt_model_s; +typedef struct { + const struct rt_model_s* model; + vk_render_type_e render_type; + matrix3x4 transform, prev_transform; + vec4_t color; +} rt_frame_add_model_args_t; +void RT_FrameAddModel( rt_frame_add_model_args_t args ); +~~ + + +rt_instance_t/rt_blas_t: +- VkAS blas + - VkASGeometry geom[] -> (vertex+index buffer address) + - VkASBuildRangeInfo ranges[] -> (vtxidx buffer offsets) + - ~~TODO: updateable: blas[2]? Ping-pong update, cannot do inplace?~~ Nope, can do inplace. +- kusochki + - kusok[] + - geometry -> (vtxidx buffer offsets) + - TODO roughly the same data as VkASBuildRangeInfo, can reuse? + - material (currently embedded in kusok) + - static: tex[], scalar[] + - semi-dynamic: + - (a few) animated tex_base_color + - emissive + - animated with tex_base_color + - individual per-surface patches + - TODO: extract as a different modality not congruent with kusok data + +Usage cases for the above: +1. (Fully+semi) static. + - Accept geom[] from above with vtx+idx refernces. Consider them static. + - Allocate static/fixed blas + kusok data once at map load. + - Allocate geom+ranges[] temporarily. Fill them with vtx+idx refs. + - Build BLAS (?: how does this work with lazy/deferred BLAS building wrt geom+ranges allocation) + - Similar to staging: collect everything + temp data, then commit. + - Needs BLAS manager, similar to vk_staging + - Generate Kusok data with current geoms and materials + - Free geom+ranges + - Each frame: + - (semi-static only) Update kusochki materials for animated textures + - Add blas+kusochki_offset (+dynamic color/xform/mmode) to TLAS +2. Preallocated dynamic (triapi) + - Preallocate for fixed N geoms: + - geom+ranges[N]. + - BLAS for N geometries + - kusochki[N] + - Each frame: + - Fill geom+ranges with geom data fed from outside + - Fill kusochki --//-- + - Fast-Build BLAS as new + - Add to TLAS +3. Dynamic with update (animated studio models, beams) + - When a new studio model entity is encountered: + - Allocate: + - AT FIXED OFFSET: vtx+idx block + - geom+ranges[N], BLAS for N, kusochki[N] + - Each frame: + - Fill geom+ranges with geom data + - Fill kusochki --//-- + - First frame: BLAS as new + - Next frames: UPDATE BLAS in-place (depends on fixed offsets for vtx+idx) + - Add to TLAS +4. Instanced (sprites, studio models w/o animations). + - Same as static, BUT potentially dynamic and different materials. I.e. have to have per-instance kusochki copies with slightly different material contents. + - I.e. each frame + - If modifying materials (e.g. different texture for sprites): + - allocate temporary (for this frame only) kusochki block + - fill geom+material kusochki data + - Add to TLAS w/ correct kusochki offset. + diff --git a/ref/vk/vk_ray_accel.c b/ref/vk/vk_ray_accel.c index c9d0ad7d..f144d69f 100644 --- a/ref/vk/vk_ray_accel.c +++ b/ref/vk/vk_ray_accel.c @@ -17,7 +17,7 @@ struct rt_vk_ray_accel_s g_accel = {0}; static struct { struct { - int blas_count; + int instances_count; int accels_built; } stats; } g_accel_; @@ -150,9 +150,9 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) { }, }, }; - const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_ACCELS }; //cmdbuf == VK_NULL_HANDLE ? MAX_ACCELS : g_ray_model_state.frame.num_models }; + const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_INSTANCES }; //cmdbuf == VK_NULL_HANDLE ? MAX_ACCELS : g_ray_model_state.frame.instances_count }; const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = { - .primitiveCount = g_ray_model_state.frame.num_models, + .primitiveCount = g_ray_model_state.frame.instances_count, }; const as_build_args_t asrgs = { .geoms = tl_geom, @@ -172,12 +172,12 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) { } void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { - ASSERT(g_ray_model_state.frame.num_models > 0); + ASSERT(g_ray_model_state.frame.instances_count > 0); DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas"); R_FlippingBuffer_Flip( &g_accel.tlas_geom_buffer_alloc ); - const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.num_models, 1); + const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.instances_count, 1); ASSERT(instance_offset != ALO_ALLOC_FAILED); // Upload all blas instances references to GPU mem @@ -185,23 +185,23 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { const vk_staging_region_t headers_lock = R_VkStagingLockForBuffer((vk_staging_buffer_args_t){ .buffer = g_ray_model_state.model_headers_buffer.buffer, .offset = 0, - .size = g_ray_model_state.frame.num_models * sizeof(struct ModelHeader), + .size = g_ray_model_state.frame.instances_count * sizeof(struct ModelHeader), .alignment = 16, }); ASSERT(headers_lock.ptr); VkAccelerationStructureInstanceKHR* inst = ((VkAccelerationStructureInstanceKHR*)g_accel.tlas_geom_buffer.mapped) + instance_offset; - for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) { - const vk_ray_draw_model_t* const model = g_ray_model_state.frame.models + i; - ASSERT(model->model); - ASSERT(model->model->as != VK_NULL_HANDLE); + for (int i = 0; i < g_ray_model_state.frame.instances_count; ++i) { + const rt_draw_instance_t* const instance = g_ray_model_state.frame.instances + i; + ASSERT(instance->model); + ASSERT(instance->model->as != VK_NULL_HANDLE); inst[i] = (VkAccelerationStructureInstanceKHR){ - .instanceCustomIndex = model->model->kusochki_offset, + .instanceCustomIndex = instance->model->kusochki_offset, .instanceShaderBindingTableRecordOffset = 0, - .accelerationStructureReference = getASAddress(model->model->as), // TODO cache this addr + .accelerationStructureReference = getASAddress(instance->model->as), // TODO cache this addr }; - switch (model->material_mode) { + switch (instance->material_mode) { case MATERIAL_MODE_OPAQUE: inst[i].mask = GEOMETRY_BIT_OPAQUE; inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_REGULAR, @@ -225,21 +225,21 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR; break; default: - gEngine.Host_Error("Unexpected material mode %d\n", model->material_mode); + gEngine.Host_Error("Unexpected material mode %d\n", instance->material_mode); break; } - memcpy(&inst[i].transform, model->transform_row, sizeof(VkTransformMatrixKHR)); + memcpy(&inst[i].transform, instance->transform_row, sizeof(VkTransformMatrixKHR)); struct ModelHeader *const header = ((struct ModelHeader*)headers_lock.ptr) + i; - header->mode = model->material_mode; - Vector4Copy(model->model->color, header->color); - Matrix4x4_ToArrayFloatGL(model->model->prev_transform, (float*)header->prev_transform); + header->mode = instance->material_mode; + Vector4Copy(instance->model->color, header->color); + Matrix4x4_ToArrayFloatGL(instance->model->prev_transform, (float*)header->prev_transform); } R_VkStagingUnlock(headers_lock.handle); } - g_accel_.stats.blas_count = g_ray_model_state.frame.num_models; + g_accel_.stats.instances_count = g_ray_model_state.frame.instances_count; // Barrier for building all BLASes // BLAS building is now in cmdbuf, need to synchronize with results @@ -250,7 +250,7 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, .buffer = g_accel.accels_buffer.buffer, .offset = instance_offset * sizeof(VkAccelerationStructureInstanceKHR), - .size = g_ray_model_state.frame.num_models * sizeof(VkAccelerationStructureInstanceKHR), + .size = g_ray_model_state.frame.instances_count * sizeof(VkAccelerationStructureInstanceKHR), }}; vkCmdPipelineBarrier(combuf->cmdbuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, @@ -282,7 +282,7 @@ qboolean RT_VkAccelInit(void) { g_accel.scratch_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.scratch_buffer.buffer); // TODO this doesn't really need to be host visible, use staging - if (!VK_BufferCreate("ray tlas_geom_buffer", &g_accel.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_ACCELS * 2, + if (!VK_BufferCreate("ray tlas_geom_buffer", &g_accel.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_INSTANCES * 2, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { @@ -290,9 +290,9 @@ qboolean RT_VkAccelInit(void) { return false; } g_accel.tlas_geom_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.tlas_geom_buffer.buffer); - R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_ACCELS * 2); + R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_INSTANCES * 2); - R_SpeedsRegisterMetric(&g_accel_.stats.blas_count, "blas_count", kSpeedsMetricCount); + R_SpeedsRegisterMetric(&g_accel_.stats.instances_count, "accels_instances_count", kSpeedsMetricCount); R_SpeedsRegisterMetric(&g_accel_.stats.accels_built, "accels_built", kSpeedsMetricCount); return true; diff --git a/ref/vk/vk_ray_internal.h b/ref/vk/vk_ray_internal.h index f1054bad..8fd09343 100644 --- a/ref/vk/vk_ray_internal.h +++ b/ref/vk/vk_ray_internal.h @@ -4,7 +4,7 @@ #include "vk_buffer.h" #include "vk_const.h" -#define MAX_ACCELS 2048 +#define MAX_INSTANCES 2048 #define MAX_KUSOCHKI 32768 #define MODEL_CACHE_SIZE 2048 @@ -32,11 +32,11 @@ typedef struct vk_ray_model_s { typedef struct Kusok vk_kusok_data_t; -typedef struct { +typedef struct rt_draw_instance_s { matrix3x4 transform_row; vk_ray_model_t *model; uint32_t material_mode; // MATERIAL_MODE_ from ray_interop.h -} vk_ray_draw_model_t; +} rt_draw_instance_t; typedef struct { const char *debug_name; @@ -66,9 +66,9 @@ typedef struct { // Per-frame data that is accumulated between RayFrameBegin and End calls struct { - int num_models; - int num_lighttextures; - vk_ray_draw_model_t models[MAX_ACCELS]; + rt_draw_instance_t instances[MAX_INSTANCES]; + int instances_count; + uint32_t scratch_offset; // for building dynamic blases } frame; diff --git a/ref/vk/vk_ray_model.c b/ref/vk/vk_ray_model.c index c34361bd..0645600f 100644 --- a/ref/vk/vk_ray_model.c +++ b/ref/vk/vk_ray_model.c @@ -84,77 +84,6 @@ static vk_ray_model_t *getModelFromCache(int num_geoms, int max_prims, const VkA return model; } -static void assertNoOverlap( uint32_t o1, uint32_t s1, uint32_t o2, uint32_t s2 ) { - uint32_t min_offset, min_size; - uint32_t max_offset; - - if (o1 < o2) { - min_offset = o1; - min_size = s1; - max_offset = o2; - } else { - min_offset = o2; - min_size = s2; - max_offset = o1; - } - - ASSERT(min_offset + min_size <= max_offset); -} - -static void validateModelPair( const vk_ray_model_t *m1, const vk_ray_model_t *m2 ) { - if (m1 == m2) return; - if (!m2->num_geoms) return; - assertNoOverlap(m1->debug.as_offset, m1->size, m2->debug.as_offset, m2->size); - if (m1->taken && m2->taken) - assertNoOverlap(m1->kusochki_offset, m1->num_geoms, m2->kusochki_offset, m2->num_geoms); -} - -static void validateModel( const vk_ray_model_t *model ) { - for (int j = 0; j < ARRAYSIZE(g_ray_model_state.models_cache); ++j) { - validateModelPair(model, g_ray_model_state.models_cache + j); - } -} - -static void validateModels( void ) { - for (int i = 0; i < ARRAYSIZE(g_ray_model_state.models_cache); ++i) { - validateModel(g_ray_model_state.models_cache + i); - } -} - -void XVK_RayModel_Validate( void ) { - const vk_kusok_data_t* kusochki = g_ray_model_state.kusochki_buffer.mapped; - ASSERT(g_ray_model_state.frame.num_models <= ARRAYSIZE(g_ray_model_state.frame.models)); - for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) { - const vk_ray_draw_model_t *draw_model = g_ray_model_state.frame.models + i; - const vk_ray_model_t *model = draw_model->model; - int num_geoms = 1; // TODO can't validate non-dynamic models because this info is lost - ASSERT(model); - ASSERT(model->as != VK_NULL_HANDLE); - ASSERT(model->kusochki_offset < MAX_KUSOCHKI); - ASSERT(model->geoms); - ASSERT(model->num_geoms > 0); - ASSERT(model->taken); - num_geoms = model->num_geoms; - - for (int j = 0; j < num_geoms; j++) { - const vk_kusok_data_t *kusok = kusochki + j; - const vk_texture_t *tex = findTexture(kusok->material.tex_base_color); - ASSERT(tex); - ASSERT(tex->vk.image.view != VK_NULL_HANDLE); - - // uint32_t index_offset; - // uint32_t vertex_offset; - // uint32_t triangles; - } - - // Check for as model memory aliasing - for (int j = 0; j < g_ray_model_state.frame.num_models; ++j) { - const vk_ray_model_t *model2 = g_ray_model_state.frame.models[j].model; - validateModelPair(model, model2); - } - } -} - static void applyMaterialToKusok(vk_kusok_data_t* kusok, const vk_render_geometry_t *geom) { const xvk_material_t *const mat = XVK_GetMaterialForTextureIndex( geom->texture ); ASSERT(mat); @@ -295,9 +224,6 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) { ray_model->material_mode = -1; Vector4Set(ray_model->color, 1, 1, 1, 1); Matrix4x4_LoadIdentity(ray_model->prev_transform); - - if (vk_core.debug) - validateModel(ray_model); } } } @@ -422,19 +348,21 @@ static qboolean uploadKusochki(const vk_ray_model_t *const model, const vk_rende } void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render_model) { - vk_ray_draw_model_t* draw_model = g_ray_model_state.frame.models + g_ray_model_state.frame.num_models; + rt_draw_instance_t* draw_instance = g_ray_model_state.frame.instances + g_ray_model_state.frame.instances_count; ASSERT(vk_core.rtx); - ASSERT(g_ray_model_state.frame.num_models <= ARRAYSIZE(g_ray_model_state.frame.models)); + ASSERT(g_ray_model_state.frame.instances_count <= ARRAYSIZE(g_ray_model_state.frame.instances)); ASSERT(model->num_geoms == render_model->num_geometries); - if (g_ray_model_state.frame.num_models == ARRAYSIZE(g_ray_model_state.frame.models)) { + if (g_ray_model_state.frame.instances_count == ARRAYSIZE(g_ray_model_state.frame.instances)) { gEngine.Con_Printf(S_ERROR "Ran out of AccelerationStructure slots\n"); return; } ASSERT(model->as != VK_NULL_HANDLE); + // TODO this material mapping is context dependent. I.e. different entity types might need different ray tracing behaviours for + // same render_mode/type and even texture. uint32_t material_mode = MATERIAL_MODE_OPAQUE; switch (render_model->render_type) { case kVkRenderTypeSolid: @@ -461,17 +389,12 @@ void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render } // Re-upload kusochki if needed - // TODO all of this will not be required when model data is split out from Kusok struct + // TODO all of this can be removed. We just need to make sure that kusochki have been uploaded once (for static models). #define Vector4Compare(v1,v2) ((v1)[0]==(v2)[0] && (v1)[1]==(v2)[1] && (v1)[2]==(v2)[2] && (v1)[3]==(v2)[3]) const qboolean upload_kusochki = (model->material_mode != material_mode || !Vector4Compare(model->color, render_model->color) || memcmp(model->prev_transform, render_model->prev_transform, sizeof(matrix4x4)) != 0); -// TODO optimize: -// - collect list of geoms for which we could update anything (animated textues, uvs, etc) -// - update only those through staging -// - also consider tracking whether the main model color has changed (that'd need to update everything yay) - if (upload_kusochki) { model->material_mode = material_mode; Vector4Copy(render_model->color, model->color); @@ -491,11 +414,11 @@ void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render RT_LightAddPolygon(polylight); } - draw_model->model = model; - draw_model->material_mode = material_mode; - Matrix3x4_Copy(draw_model->transform_row, render_model->transform); + draw_instance->model = model; + draw_instance->material_mode = material_mode; + Matrix3x4_Copy(draw_instance->transform_row, render_model->transform); - g_ray_model_state.frame.num_models++; + g_ray_model_state.frame.instances_count++; } void RT_RayModel_Clear(void) { @@ -507,18 +430,18 @@ void XVK_RayModel_ClearForNextFrame( void ) { // currently framectl waits for the queue to complete before returning // so we can be sure here that previous frame is complete and we're free to // destroy/reuse dynamic ASes from previous frame - for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) { - vk_ray_draw_model_t *model = g_ray_model_state.frame.models + i; - ASSERT(model->model); + for (int i = 0; i < g_ray_model_state.frame.instances_count; ++i) { + rt_draw_instance_t *instance = g_ray_model_state.frame.instances + i; + ASSERT(instance->model); - if (!model->model->dynamic) + if (!instance->model->dynamic) continue; - returnModelToCache(model->model); - model->model = NULL; + returnModelToCache(instance->model); + instance->model = NULL; } - g_ray_model_state.frame.num_models = 0; + g_ray_model_state.frame.instances_count = 0; // TODO N frames in flight // HACK: blas caching requires persistent memory diff --git a/ref/vk/vk_rtx.c b/ref/vk/vk_rtx.c index fafdd40a..49aa4388 100644 --- a/ref/vk/vk_rtx.c +++ b/ref/vk/vk_rtx.c @@ -563,7 +563,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args) ASSERT(g_rtx.mainpipe_out); - if (g_ray_model_state.frame.num_models == 0) { + if (g_ray_model_state.frame.instances_count == 0) { const r_vkimage_blit_args blit_args = { .in_stage = VK_PIPELINE_STAGE_TRANSFER_BIT, .src = { @@ -641,7 +641,7 @@ qboolean VK_RayInit( void ) return false; } - if (!VK_BufferCreate("model headers", &g_ray_model_state.model_headers_buffer, sizeof(struct ModelHeader) * MAX_ACCELS, + if (!VK_BufferCreate("model headers", &g_ray_model_state.model_headers_buffer, sizeof(struct ModelHeader) * MAX_INSTANCES, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { // FIXME complain, handle