diff --git a/ref/vk/NOTES.md b/ref/vk/NOTES.md index ed768748..e1029bef 100644 --- a/ref/vk/NOTES.md +++ b/ref/vk/NOTES.md @@ -69,3 +69,332 @@ However, there are several staging cmdbuf usages which are technically out-of-ba TriGetMatrix, TriFogParams, TriCullFace, + + +# Better BLAS management API + +~~ +BLAS: +- geom_count => kusok.geom/material.size() == geom_count + +Model types: +1. Fully static (brush model w/o animated textures; studio model w/o animations): singleton, fixed geoms and materials, uploaded only once +2. Semi-static (brush model w/ animated textures): singleton, fixed geoms, may update materials, inplace (e.g. animated textures) +3. Dynamic (beams, triapi, etc): singleton, may update both geoms and materials, inplace +4. Template (sprites): used by multiple instances, fixed geom, multiple materials (colors, textures etc) instances/copies +5. Update-from template (studo models): used by multiple dynamic models, deriving from it wvia BLAS UPDATE, dynamic geom+locations, fixed-ish materials. + +API ~ +1. RT_ModelCreate(geometries_count dynamic?static?) -> rt_model + preallocated mem +2. RT_ModelBuild/Update(geometries[]) -> (blas + kusok.geom[]) +3. RT_ModelUpdateMaterials(model, geometries/textures/materials[]); -> (kusok.material[]) +4. RT_FrameAddModel(model + kusok.geom[] + kusok.material[] + render_type + xform + color) +~~ + + +rt_instance_t/rt_blas_t: +- VkAS blas + - VkASGeometry geom[] -> (vertex+index buffer address) + - VkASBuildRangeInfo ranges[] -> (vtxidx buffer offsets) + - ~~TODO: updateable: blas[2]? Ping-pong update, cannot do inplace?~~ Nope, can do inplace. +- kusochki + - kusok[] + - geometry -> (vtxidx buffer offsets) + - TODO roughly the same data as VkASBuildRangeInfo, can reuse? + - material (currently embedded in kusok) + - static: tex[], scalar[] + - semi-dynamic: + - (a few) animated tex_base_color + - emissive + - animated with tex_base_color + - individual per-surface patches + - TODO: extract as a different modality not congruent with kusok data + +Usage cases for the above: +1. (Fully+semi) static. + - Accept geom[] from above with vtx+idx refernces. Consider them static. + - Allocate static/fixed blas + kusok data once at map load. + - Allocate geom+ranges[] temporarily. Fill them with vtx+idx refs. + - Build BLAS (?: how does this work with lazy/deferred BLAS building wrt geom+ranges allocation) + - Similar to staging: collect everything + temp data, then commit. + - Needs BLAS manager, similar to vk_staging + - Generate Kusok data with current geoms and materials + - Free geom+ranges + - Each frame: + - (semi-static only) Update kusochki materials for animated textures + - Add blas+kusochki_offset (+dynamic color/xform/mmode) to TLAS +2. Preallocated dynamic (triapi) + - Preallocate for fixed N geoms: + - geom+ranges[N]. + - BLAS for N geometries + - kusochki[N] + - Each frame: + - Fill geom+ranges with geom data fed from outside + - Fill kusochki --//-- + - Fast-Build BLAS as new + - Add to TLAS +3. Dynamic with update (animated studio models, beams) + - When a new studio model entity is encountered: + - Allocate: + - AT FIXED OFFSET: vtx+idx block + - geom+ranges[N], BLAS for N, kusochki[N] + - Each frame: + - Fill geom+ranges with geom data + - Fill kusochki --//-- + - First frame: BLAS as new + - Next frames: UPDATE BLAS in-place (depends on fixed offsets for vtx+idx) + - Add to TLAS +4. Instanced (sprites, studio models w/o animations). + - Same as static, BUT potentially dynamic and different materials. I.e. have to have per-instance kusochki copies with slightly different material contents. + - I.e. each frame + - If modifying materials (e.g. different texture for sprites): + - allocate temporary (for this frame only) kusochki block + - fill geom+material kusochki data + - Add to TLAS w/ correct kusochki offset. + +Exposed ops: +- Create BLAS for N geoms +- Allocate kusochki[N] + - static (fixed pos) + - temporary (any location, single frame lifetime) +- Fill kusochki + - All geoms[] + - Subset of geoms[] (animated textures for static) +- Build BLAS + - Allocate geom+ranges[N] + - Single frame staging-like? + - Needed only for BLAS BUILD/UPDATE + - from geoms+ranges[N] + - build vs update +- Add to TLAS w/ color/xform/mmode/... + +- geometry_buffer -- vtx+idx static + multi-frame dynamic + single-frame dynamic +- kusochki_buffer -- kusok[] static + dynamic + clone_dynamic +- accel_buffer -- static + multiframe dynamic + single-frame dynamic +- scratch_buffer - single-frame dynamic +- model_buffer - single-frame dynamic + +# E268: explicit kusochki management +Kusochki buffer has a similar lifetime rules to geometry buffer +Funcs: +- Allocate kusochki[N] w/ static/long lifetime +- Allocate dynamic (single-frame) kusochki[N] +- Upload geom[N] -> kusochki[N] +- Upload subset geom[ind[M] -> kusochki[M] + +# E269 + +RT model alloc: +- blas -- fixed + - accel buffer region -- fixed + - (scratch: once for build) + - (geoms: once for build) +- -> geometry buffer -- fixed +- kusochki[G]: geometry data -- fixed +- materials[G]: -- fixed + +RT model update: +- lives in the same statically allocated blas + accel_buffer +- + +RT model draw: +- mmode +- materials[G] -- can be fully static, partially dynamic, fully dynamic + - update inplace for most of dynamic things + - clone for instanced +- color +- transforms + +## Blocks +### Layer 0: abstract, not backing-dependent + handle = R_BlockAlloc(int size, lifetime); + - block possible users: {accel buffer, geometry, kusochki, materials}; + - lifetime + - long: map, N frames: basically everything) + - once = this frame only: sprite materials, triapi geometry/kusochki/materials + - handle: offset, size + - R_BlockAcquire/Release(handle); + - R_BlocksClearOnce(); -- frees "once" regions, checking that they are not referenced + - R_blocksClearFull(); -- clears everything, checking that there are not external references + +### Layer 1: backed by buffer +- lock = R_SmthLock(handle, size, offset) + - marks region/block as dirty (cannot be used by anything yet, prevents release, clear, etc.), + - opens staging regiong for filling and uploading +- R_SmthUnlock(lock) + - remembers dirty region (for barriers) + - submits into staging queue +- ?? R_SmthBarrier -- somehow ask for the barrier struct given pipelines, etc + +# E271 + +## Map loading sequence +1. For a bunch of sprites: + 1. Load their textures + 2. Mod_ProcessRenderData(spr, create=1) +2. "loading maps/c1a0.bsp" message + 1. Load a bunch of `#maps/c1a0.bsp:*.mip` textures + 2. Mod_ProcessRenderData(maps/c1a0.bsp, create=1) +3. For studio models: + 1. Load their textures + 2. Mod_ProcessRenderData(mdl, create=1) +4. "level loaded at 0.31 sec" message +5. 1-2 frames drawn (judging by vk swapchain logs) +6. Do another bunch of sprites (as #1) +7. Lightstyles logs +8. "Setting up renderer..." message +9. R_NewMap() is called + 1. (vk) load skybox + 2. (vk) extract WADs, parse entities + 3. (vk) parse materials + 4. (vk) parse patches + 5. (vk) load models + 1. load brush models + 2. skip studio and sprite models + 6. (vk) load lights: parse rad files, etc +10. "loading model/scientist02.mdl" +11. Load 640_pain.spr ???, Mod_ProcessRenderData() first, then textures ?? + +## Map unloading sequence +1. Mod_ProcessRenderData(maps/c1a0.bps, create=0) + - NO similar calls for `*` brush submodels. +2. For the rest of studio and sprite models: + - Mod_ProcessRenderData(create=0) + +# E274 + +rt_model: + - kusok/geom + - index_,vertex_offset (static, same as geom/blas lifetime) + - ref to material (static or dynamic) + - emissive (mostly static, independent to material) + - instanceCustomIndex (24 bits) = offset to kusochki buffer + - kusochki[G] + - geom data (index, vertex offsets) + - emissive + - material + - materials[M] + - kusochki[N] <- iCI + + +# E275 studio models + +- `R_StudioDrawPoints()` + - `VK_RenderModelDynamicBegin()` + - compute `g_studio.verts` + - in: + - `m_pSubModel` + - `m_pStudioHeader` + - `g_studio.worldtransform` + - `R_StudioBuildNormalTable()` ... + - `R_StudioGenerateNormals()` + - in: + - `m_pStudioHeader` + - `m_pSubModel` + - `g_studio.verts` + - out: + - `g_studio.norms` + - `g_studio.tangents` + - for all submodel meshes + - compute normals+tangents + - for all submodel meshes + - `R_StudioDrawNormalMesh()` + - `R_GeometryBufferAllocOnceAndLock()` + - fills it with vertex/index data, reading `g_studio.verts/norms/tangents/...` + - `R_StudioSetColorBegin()` ??? + - `R_GeometryBufferUnlock()` + - `VK_RenderModelDynamicAddGeometry()` + - `VK_RenderModelDynamicCommit()` + +- `R_StudioDrawPoints()` callers: + - external ??? + - `R_StudioRenderFinal()` + +- `R_StudioRenderFinal()` + - ... TBD + - `VK_RenderDebugLabelBegin()` + - for all `m_pStudioHeader->numbodyparts` + - `R_StudioSetupModel()` -- also can be called externally + - set `m_pBodyPart` + - set `m_pSubModel` + - `R_StudioDrawPoints()` + - `GL_StudioDrawShadow()` + - `VK_RenderDebugLabelEnd()` + +- `R_StudioDrawModelInternal()` + - called from: + - `R_DrawStudioModel()` 3x + - `R_DrawViewModel()` + - `R_RunViewmodelEvents()` + - `VK_RenderDebugLabelBegin()` + - `R_StudioDrawModel()` + - in: + - `RI.currententity` + - `RI.currentmodel` + - `R_StudioSetHeader()` + - sets `m_pStudioHeader` + - `R_StudioSetUpTransform(entity = RI.currententity)` + - `R_StudioLerpMovement(entity)` + - updates entity internal state + - `g_studio.rotationmatrix = Matrix3x4_CreateFromEntity()` + - `VK_RenderDebugLabelEnd()` + +- `VK_StudioDrawModel()` -- called from vk_scene.c + - sets `RI.currententity`, `RI.currentmodel` + - `R_DrawStudioModel()` + - `R_StudioSetupTimings()` -- sets `g_studio.time/frametime` + - `R_StudioDrawModelInternal()` + +# E279 +## Studio model animation +- studiohdr_t + - int numseq -- number of "sequences"? + - int seqindex -- offset to sequences: + `pseqdesc = (mstudioseqdesc_t *)((byte *)pstudiohdr + pstudiohdr->seqindex) + sequence;` +- mstudioseqdesc_t + - int numframes + - int fps +- mstudioanim_t + - = gEngine.R_StudioGetAnim(studiohdr, model, seqdesc) + +- cl_entity_t + - sequence -- references studio model sequence + - animtime/frame -- references animation state within sequence + +# E282 +## Studio model tracking +`m_pStudioHeader` is set from: +- `R_StudioSetHeader()` from: + - EXTERNAL + - `R_StudioDrawModel()` + - `R_StudioDrawPlayer()` +- `R_StudioDrawPlayer()` + +## Detecting static/unchanged studio submodels +### Parse `studiohdr_t` eagerly +Go deeply into sequences, animations, etc and figure out whether vertices will actually change. +Might not catch models which are not being animated right now, i.e. current frame is the same as previous one, altough it is not guaranteed to be so. +This potentially conflicts with game logic updating bonetransforms manually even though there are no recorded animations in studio file. + +### Detect changes dynamically +Let it process vertices as usual, but then compute hash of vertices values. +Depends on floating point vertices coordinates being bit-perfect same every time, even for moving entities. This is not strictly speaking true because studio model rendering is organized in such a way that bone matrices are pre-multiplied by entity transform matrix. This is done outside of vk_studio.c, and in game dll,which we have no control over. We then undo this multiplication. Given floating point nature of all of this garbage, there will be precision errors and resulting coordinates are not guaranteed to be the same even for completely static models. + +### Lazily detect static models, and draw the rest as fully dynamic with fast build +- Detect simple static cases (one sequence, one frame), and pre-build those. +- For everything else, just build it from scratch every frame w/o caching or anything. +If that is not fast enough, then we can proceed with more involved per-entity caching, BLAS updates, cache eviction, etc. + +TODO: can we not have a BLAS/model for each submodel? Can it be per-model instead? This would need prior knowledge of submodel count, mesh count, vertices and indices counts. (Potentially conflicts with game dll doing weird things, e.g. skipping certain submodels based on whatever game specific logic) + +### Action plan +- [ ] Try to pre-build static studio models. If fails (e.g. still need dynamic knowledge for the first build), then build it lazily, i.e. when the model is rendered for the first time. + - [ ] Needs tracking of model cache entry whenever `m_pStudioHeader` is set. +- [ ] Add a cache for entities, store all prev_* stuff there. + - [ ] Needs tracking of entity cache entry whenever `RI.currententity` is set. + +- [ ] Alternative model/entity tracking: just check current ptrs in `R_StudioDrawPoints()` and update them if changed. + +# 2023-07-30 +- ~~R_DrawStudioModel is the main func for drawing studio model. Called from scene code for each studio entity, with everything current (RI and stuff) set up~~ +- `R_StudioDrawModelInternal()` is the main one. It is where it splits into renderer-vs-game rendering functions. diff --git a/ref/vk/alolcator.c b/ref/vk/alolcator.c index b7ed036f..456544b0 100644 --- a/ref/vk/alolcator.c +++ b/ref/vk/alolcator.c @@ -108,6 +108,7 @@ typedef struct alo_pool_s { #define DEFAULT_CAPACITY 256 +// TODO make it not a pointer. Just Init struct alo_pool_s* aloPoolCreate(alo_size_t size, int expected_allocations, alo_size_t min_alignment) { alo_pool_t *pool = MALLOC(sizeof(*pool)); block_t *b; @@ -294,6 +295,71 @@ uint32_t aloRingAlloc(alo_ring_t* ring, uint32_t size, uint32_t alignment) { return 0; } +// free--><- allocated +// [a....p|q.r.] +// free-> +// [a....|pq.r.] +// freeing item: +// - swap with first allocated +// [a....r|q.p.] + +void aloIntPoolGrow(alo_int_pool_t *pool, int new_capacity) { + int *const new_free_list = MALLOC(sizeof(int) * new_capacity); + const int new_items = new_capacity - pool->capacity; + + for (int i = 0; i < pool->free; ++i) + new_free_list[i] = pool->free_list[i]; + + for (int i = 0; i < new_items; ++i) + new_free_list[pool->free + i] = new_capacity - i - 1; + + if (pool->free_list) + FREE(pool->free_list); + + pool->free_list = new_free_list; + pool->free += new_items; + pool->capacity = new_capacity; +} + +int aloIntPoolAlloc(alo_int_pool_t *pool) { + if (pool->free == 0) + return -1; + + pool->free--; + return pool->free_list[pool->free]; +} + +void aloIntPoolFree(alo_int_pool_t *pool, int val) { + ASSERT(pool->free < pool->capacity); + ASSERT(val >= 0); + ASSERT(val < pool->capacity); + + // Manager allocated tail list + for (int i = pool->free; i < pool->capacity; ++i) { + if (pool->free_list[i] != val) + continue; + + const int tmp = pool->free_list[pool->free]; + pool->free_list[pool->free] = val; + pool->free_list[i] = tmp; + + ++pool->free; + return; + } + + ASSERT(!"Item not found"); +} + +void aloIntPoolClear(alo_int_pool_t *pool) { + // Depends on the fact that the tail free_list contains properly maintained allocated ints + pool->free = pool->capacity; +} + +void aloIntPoolDestroy(alo_int_pool_t *pool) { + if (pool->free_list) + FREE(pool->free_list); +} + #if defined(ALOLCATOR_TEST) #include uint32_t rand_pcg32(uint32_t max) { diff --git a/ref/vk/alolcator.h b/ref/vk/alolcator.h index 864f3c0a..c5224e10 100644 --- a/ref/vk/alolcator.h +++ b/ref/vk/alolcator.h @@ -35,3 +35,17 @@ uint32_t aloRingAlloc(alo_ring_t* ring, uint32_t size, uint32_t alignment); // Marks everything up-to-pos as free (expects up-to-pos to be valid) void aloRingFree(alo_ring_t* ring, uint32_t up_to_pos); + +// Integer pool/freelist +// Get integers from 0 to capacity +typedef struct alo_int_pool_s { + int *free_list; + int capacity; + int free; +} alo_int_pool_t; + +void aloIntPoolGrow(alo_int_pool_t *pool, int new_capacity); +int aloIntPoolAlloc(alo_int_pool_t *pool); +void aloIntPoolFree(alo_int_pool_t *pool, int); +void aloIntPoolClear(alo_int_pool_t *pool); +void aloIntPoolDestroy(alo_int_pool_t *pool); diff --git a/ref/vk/debugbreak.h b/ref/vk/debugbreak.h new file mode 100644 index 00000000..bfb82884 --- /dev/null +++ b/ref/vk/debugbreak.h @@ -0,0 +1,174 @@ +/* Copyright (c) 2011-2021, Scott Tsai + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef DEBUG_BREAK_H +#define DEBUG_BREAK_H + +#ifdef _MSC_VER + +#define debug_break __debugbreak + +#else + +#ifdef __cplusplus +extern "C" { +#endif + +#define DEBUG_BREAK_USE_TRAP_INSTRUCTION 1 +#define DEBUG_BREAK_USE_BULTIN_TRAP 2 +#define DEBUG_BREAK_USE_SIGTRAP 3 + +#if defined(__i386__) || defined(__x86_64__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__inline__ static void trap_instruction(void) +{ + __asm__ volatile("int $0x03"); +} +#elif defined(__thumb__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +/* FIXME: handle __THUMB_INTERWORK__ */ +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'arm-linux-tdep.c' in GDB source. + * Both instruction sequences below work. */ +#if 1 + /* 'eabi_linux_thumb_le_breakpoint' */ + __asm__ volatile(".inst 0xde01"); +#else + /* 'eabi_linux_thumb2_le_breakpoint' */ + __asm__ volatile(".inst.w 0xf7f0a000"); +#endif + + /* Known problem: + * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. + * 'step' would keep getting stuck on the same instruction. + * + * Workaround: use the new GDB commands 'debugbreak-step' and + * 'debugbreak-continue' that become available + * after you source the script from GDB: + * + * $ gdb -x debugbreak-gdb.py <... USUAL ARGUMENTS ...> + * + * 'debugbreak-step' would jump over the breakpoint instruction with + * roughly equivalent of: + * (gdb) set $instruction_len = 2 + * (gdb) tbreak *($pc + $instruction_len) + * (gdb) jump *($pc + $instruction_len) + */ +} +#elif defined(__arm__) && !defined(__thumb__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'arm-linux-tdep.c' in GDB source, + * 'eabi_linux_arm_le_breakpoint' */ + __asm__ volatile(".inst 0xe7f001f0"); + /* Known problem: + * Same problem and workaround as Thumb mode */ +} +#elif defined(__aarch64__) && defined(__APPLE__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_BULTIN_DEBUGTRAP +#elif defined(__aarch64__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'aarch64-tdep.c' in GDB source, + * 'aarch64_default_breakpoint' */ + __asm__ volatile(".inst 0xd4200000"); +} +#elif defined(__powerpc__) + /* PPC 32 or 64-bit, big or little endian */ + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'rs6000-tdep.c' in GDB source, + * 'rs6000_breakpoint' */ + __asm__ volatile(".4byte 0x7d821008"); + + /* Known problem: + * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. + * 'step' stuck on the same instruction ("twge r2,r2"). + * + * The workaround is the same as ARM Thumb mode: use debugbreak-gdb.py + * or manually jump over the instruction. */ +} +#elif defined(__riscv) + /* RISC-V 32 or 64-bit, whether the "C" extension + * for compressed, 16-bit instructions are supported or not */ + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'riscv-tdep.c' in GDB source, + * 'riscv_sw_breakpoint_from_kind' */ + __asm__ volatile(".4byte 0x00100073"); +} +#else + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_SIGTRAP +#endif + + +#ifndef DEBUG_BREAK_IMPL +#error "debugbreak.h is not supported on this target" +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + trap_instruction(); +} +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_BULTIN_DEBUGTRAP +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + __builtin_debugtrap(); +} +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_BULTIN_TRAP +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + __builtin_trap(); +} +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_SIGTRAP +#include +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + raise(SIGTRAP); +} +#else +#error "invalid DEBUG_BREAK_IMPL value" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ifdef _MSC_VER */ + +#endif /* ifndef DEBUG_BREAK_H */ diff --git a/ref/vk/r_block.c b/ref/vk/r_block.c new file mode 100644 index 00000000..8924464a --- /dev/null +++ b/ref/vk/r_block.c @@ -0,0 +1,116 @@ +#include "r_block.h" + +#include "vk_common.h" // ASSERT +#include "vk_core.h" // vk_core.pool + +typedef struct r_blocks_block_s { + int long_index; + uint32_t refcount; +} r_blocks_block_t; + +// logical blocks +// <---- lifetime long -><-- once --> +// [.....................|............] +// <--- pool --><-- ring ---> +// offset ? ---> + +int allocMetablock(r_blocks_t *blocks) { + return aloIntPoolAlloc(&blocks->blocks.freelist); + // TODO grow if needed +} + +r_block_t R_BlockAllocLong(r_blocks_t *blocks, uint32_t size, uint32_t alignment) { + r_block_t ret = { + .offset = ALO_ALLOC_FAILED, + .size = 0, + .impl_ = {-1} + }; + + const alo_block_t ablock = aloPoolAllocate(blocks->long_pool, size, alignment); + if (ablock.offset == ALO_ALLOC_FAILED) { + gEngine.Con_Reportf(S_ERROR "aloPoolAllocate failed\n"); + return ret; + } + + const int metablock_index = allocMetablock(blocks); + if (metablock_index < 0) { + gEngine.Con_Reportf(S_ERROR "allocMetablock failed\n"); + aloPoolFree(blocks->long_pool, ablock.index); + return ret; + } + + ret.offset = ablock.offset; + ret.size = ablock.size; + ret.impl_.index = metablock_index; + ret.impl_.blocks = blocks; + + r_blocks_block_t *metablock = blocks->blocks.storage + metablock_index; + metablock->long_index = ablock.index; + metablock->refcount = 1; + + /* gEngine.Con_Reportf("block alloc %dKiB => index=%d offset=%u\n", (int)size/1024, metablock_index, (int)ret.offset); */ + + blocks->allocated_long += size; + return ret; +} + +uint32_t R_BlockAllocOnce(r_blocks_t *blocks, uint32_t size, uint32_t alignment) { + const uint32_t offset = R_FlippingBuffer_Alloc(&blocks->once.flipping, size, alignment); + if (offset == ALO_ALLOC_FAILED) + return ALO_ALLOC_FAILED; + + return offset + blocks->once.ring_offset; +} + +void R_BlocksCreate(r_blocks_t *blocks, uint32_t size, uint32_t once_size, int expected_allocs) { + memset(blocks, 0, sizeof(*blocks)); + + blocks->size = size; + blocks->allocated_long = 0; + + blocks->long_pool = aloPoolCreate(size - once_size, expected_allocs, 4); + aloIntPoolGrow(&blocks->blocks.freelist, expected_allocs); + blocks->blocks.storage = Mem_Malloc(vk_core.pool, expected_allocs * sizeof(blocks->blocks.storage[0])); + + blocks->once.ring_offset = size - once_size; + R_FlippingBuffer_Init(&blocks->once.flipping, once_size); +} + +void R_BlockRelease(const r_block_t *block) { + r_blocks_t *const blocks = block->impl_.blocks; + if (!blocks || !block->size) + return; + + ASSERT(block->impl_.index >= 0); + ASSERT(block->impl_.index < blocks->blocks.freelist.capacity); + + r_blocks_block_t *const metablock = blocks->blocks.storage + block->impl_.index; + + /* gEngine.Con_Reportf("block release index=%d offset=%u refcount=%d\n", block->impl_.index, (int)block->offset, (int)metablock->refcount); */ + + ASSERT (metablock->refcount > 0); + if (--metablock->refcount) + return; + + /* gEngine.Con_Reportf("block free index=%d offset=%u\n", block->impl_.index, (int)block->offset); */ + + aloPoolFree(blocks->long_pool, metablock->long_index); + aloIntPoolFree(&blocks->blocks.freelist, block->impl_.index); + blocks->allocated_long -= block->size; +} + +void R_BlocksDestroy(r_blocks_t *blocks) { + for (int i = blocks->blocks.freelist.free; i < blocks->blocks.freelist.capacity; ++i) { + r_blocks_block_t *b = blocks->blocks.storage + blocks->blocks.freelist.free_list[i]; + ASSERT(b->refcount == 0); + } + + aloPoolDestroy(blocks->long_pool); + aloIntPoolDestroy(&blocks->blocks.freelist); + Mem_Free(blocks->blocks.storage); +} + +// Clear all LifetimeOnce blocks, checking that they are not referenced by anything +void R_BlocksClearOnce(r_blocks_t *blocks) { + R_FlippingBuffer_Flip(&blocks->once.flipping); +} diff --git a/ref/vk/r_block.h b/ref/vk/r_block.h new file mode 100644 index 00000000..15853329 --- /dev/null +++ b/ref/vk/r_block.h @@ -0,0 +1,47 @@ +#pragma once + +#include "r_flipping.h" +#include "alolcator.h" +#include + +struct r_blocks_s; +typedef struct r_block_s { + uint32_t offset; + uint32_t size; + + struct { + int index; + struct r_blocks_s *blocks; + } impl_; +} r_block_t; + +struct r_blocks_block_s; +typedef struct r_blocks_s { + uint32_t size; + + struct alo_pool_s *long_pool; + + struct { + uint32_t ring_offset; + r_flipping_buffer_t flipping; + } once; + + struct { + alo_int_pool_t freelist; + struct r_blocks_block_s *storage; + } blocks; + + // This is an estimate, it doesn't count alignment holes + int allocated_long; +} r_blocks_t; + +r_block_t R_BlockAllocLong(r_blocks_t *blocks, uint32_t size, uint32_t alignment); +uint32_t R_BlockAllocOnce(r_blocks_t *blocks, uint32_t size, uint32_t alignment); + +//void R_BlockAcquire(r_block_t *block); +void R_BlockRelease(const r_block_t *block); + +void R_BlocksCreate(r_blocks_t *blocks, uint32_t max_size, uint32_t once_max, int expected_allocs); +void R_BlocksDestroy(r_blocks_t *blocks); + +void R_BlocksClearOnce(r_blocks_t *blocks); diff --git a/ref/vk/r_flipping.h b/ref/vk/r_flipping.h new file mode 100644 index 00000000..f09c0980 --- /dev/null +++ b/ref/vk/r_flipping.h @@ -0,0 +1,17 @@ +#pragma once + +#include "alolcator.h" + +typedef struct { + alo_ring_t ring; + uint32_t frame_offsets[2]; +} r_flipping_buffer_t; + +void R_FlippingBuffer_Init(r_flipping_buffer_t *flibuf, uint32_t size); +uint32_t R_FlippingBuffer_Alloc(r_flipping_buffer_t* flibuf, uint32_t size, uint32_t align); + +// (╯°□°)╯︵ ┻━┻ +void R_FlippingBuffer_Flip(r_flipping_buffer_t* flibuf); + +// ┬─┬ノ( º _ ºノ) +void R_FlippingBuffer_Clear(r_flipping_buffer_t *flibuf); diff --git a/ref/vk/r_speeds.c b/ref/vk/r_speeds.c index fb372b90..97bca106 100644 --- a/ref/vk/r_speeds.c +++ b/ref/vk/r_speeds.c @@ -10,11 +10,13 @@ #include "xash3d_mathlib.h" // Q_min #include -#define MAX_SPEEDS_MESSAGE 1024 -#define MAX_SPEEDS_METRICS (APROF_MAX_SCOPES + 4) +#define MAX_SPEEDS_MESSAGE (1024) +#define MAX_SPEEDS_METRICS (512) #define TARGET_FRAME_TIME (1000.f / 60.f) #define MAX_GRAPHS 8 +#define MODULE_NAME "speeds" + // Valid bits for `r_speeds` argument: enum { SPEEDS_BIT_OFF = 0, // `r_speeds 0` turns off all performance stats display @@ -28,17 +30,22 @@ enum { typedef struct { int *p_value; - const char *name; + qboolean reset; + char name[64]; + const char *var_name; + const char *src_file; + int src_line; r_speeds_metric_type_t type; int low_watermark, high_watermark, max_value; int graph_index; } r_speeds_metric_t; typedef struct { + char name[64]; float *data; int data_count; int data_write; - int source_metric; + int source_metric; // can be -1 for missing metrics int height; int max_value; // Computed automatically every frame @@ -75,7 +82,13 @@ static struct { int time_us; // automatically zeroed by metrics each frame } gpu_scopes[MAX_GPU_SCOPES]; char message[MAX_SPEEDS_MESSAGE]; + + qboolean list_metrics; + string list_metrics_filter; } frame; + + // Mask g_speeds_graphs cvar writes + char graphs_list[1024]; } g_speeds; static void speedsStrcat( const char *msg ) { @@ -84,8 +97,8 @@ static void speedsStrcat( const char *msg ) { static void speedsPrintf( const char *msg, ... ) _format(1); static void speedsPrintf( const char *msg, ... ) { - va_list argptr; - char text[MAX_SPEEDS_MESSAGE]; + va_list argptr; + char text[MAX_SPEEDS_MESSAGE]; va_start( argptr, msg ); Q_vsnprintf( text, sizeof( text ), msg, argptr ); @@ -201,7 +214,8 @@ static void drawCPUProfilerScopes(int draw, const aprof_event_t *events, uint64_ const uint64_t delta_ns = timestamp_ns - stack[depth].begin_ns; if (!g_speeds.frame.scopes[scope_id].initialized) { - R_SpeedsRegisterMetric(&g_speeds.frame.scopes[scope_id].time_us, scope->name, kSpeedsMetricMicroseconds); + R_SpeedsRegisterMetric(&g_speeds.frame.scopes[scope_id].time_us, "scope", scope->name, kSpeedsMetricMicroseconds, /*reset*/ true, scope->name, __FILE__, __LINE__); + g_speeds.frame.scopes[scope_id].initialized = 1; } @@ -268,6 +282,38 @@ static void handlePause( uint32_t prev_frame_index ) { } } +// TODO move this to vk_common or something +int stringViewCmp(const_string_view_t sv, const char* s) { + for (int i = 0; i < sv.len; ++i) { + const int d = sv.s[i] - s[i]; + if (d != 0) + return d; + if (s[i] == '\0') + return 1; + } + + // Check that both strings end the same + return '\0' - s[sv.len]; +} + +static int findMetricIndexByName( const_string_view_t name) { + for (int i = 0; i < g_speeds.metrics_count; ++i) { + if (stringViewCmp(name, g_speeds.metrics[i].name) == 0) + return i; + } + + return -1; +} + +static int findGraphIndexByName( const_string_view_t name) { + for (int i = 0; i < g_speeds.graphs_count; ++i) { + if (stringViewCmp(name, g_speeds.graphs[i].name) == 0) + return i; + } + + return -1; +} + static int drawGraph( r_speeds_graph_t *const graph, int frame_bar_y ) { const int min_width = 100 * g_speeds.font_metrics.scale; const int graph_width = clampi32( @@ -277,6 +323,22 @@ static int drawGraph( r_speeds_graph_t *const graph, int frame_bar_y ) { min_width, vk_frame.width); // clamp to min_width..frame_width const int graph_height = graph->height * g_speeds.font_metrics.scale; + if (graph->source_metric < 0) { + // Check whether this metric has been registered + const int metric_index = findMetricIndexByName((const_string_view_t){graph->name, Q_strlen(graph->name)}); + + if (metric_index >= 0) { + graph->source_metric = metric_index; + g_speeds.metrics[metric_index].graph_index = graph - g_speeds.graphs; + } else { + const char *name = graph->name; + rgba_t text_color = {0xff, 0x00, 0x00, 0xff}; + gEngine.Con_DrawString(0, frame_bar_y, name, text_color); + frame_bar_y += g_speeds.font_metrics.glyph_height; + return frame_bar_y; + } + } + const r_speeds_metric_t *const metric = g_speeds.metrics + graph->source_metric; const int graph_max_value = metric->max_value ? Q_max(metric->max_value, graph->max_value) : graph->max_value; @@ -318,10 +380,9 @@ static int drawGraph( r_speeds_graph_t *const graph, int frame_bar_y ) { int max_value = INT_MIN; const qboolean watermarks = metric->low_watermark && metric->high_watermark; for (int i = 0; i < graph->data_count; ++i) { - int value = Q_max(0, graph->data[(graph->data_write + i) % graph->data_count]); - max_value = Q_max(max_value, value); - - value = Q_min(graph_max_value, value); + const int raw_value = Q_max(0, graph->data[(graph->data_write + i) % graph->data_count]); + max_value = Q_max(max_value, raw_value); + const int value = Q_min(graph_max_value, raw_value); int red = 0xed, green = 0x9f, blue = 0x01; if (watermarks) { @@ -337,7 +398,14 @@ static int drawGraph( r_speeds_graph_t *const graph, int frame_bar_y ) { const int height = watermarks ? y_pos : 2 * g_speeds.font_metrics.scale; const int y = frame_bar_y - y_pos; + // TODO lines CL_FillRGBA(x0, y, x1-x0, height, red, green, blue, 127); + + if (i == graph->data_count - 1) { + char buf[16]; + metricTypeSnprintf(buf, sizeof(buf), raw_value, metric->type); + gEngine.Con_DrawString(x1, y - g_speeds.font_metrics.glyph_height / 2, buf, text_color); + } } graph->max_value = max_value ? max_value : 1; @@ -360,7 +428,7 @@ static void drawGPUProfilerScopes(qboolean draw, int y, uint64_t frame_begin_tim const char *name = gpurofl->scopes[scope_index].name; if (!g_speeds.frame.gpu_scopes[scope_index].initialized) { - R_SpeedsRegisterMetric(&g_speeds.frame.gpu_scopes[scope_index].time_us, name, kSpeedsMetricMicroseconds); + R_SpeedsRegisterMetric(&g_speeds.frame.gpu_scopes[scope_index].time_us, "gpuscope", name, kSpeedsMetricMicroseconds, /*reset*/ true, name, __FILE__, __LINE__); g_speeds.frame.gpu_scopes[scope_index].initialized = 1; } @@ -452,10 +520,11 @@ static void printMetrics( void ) { } } -static void clearMetrics( void ) { +static void resetMetrics( void ) { for (int i = 0; i < g_speeds.metrics_count; ++i) { const r_speeds_metric_t *const metric = g_speeds.metrics + i; - *metric->p_value = 0; + if (metric->reset) + *metric->p_value = 0; } } @@ -475,7 +544,10 @@ static void getCurrentFontMetrics(void) { static int drawGraphs( int y ) { for (int i = 0; i < g_speeds.graphs_count; ++i) { r_speeds_graph_t *const graph = g_speeds.graphs + i; - graph->data[graph->data_write] = *g_speeds.metrics[graph->source_metric].p_value; + + if (graph->source_metric >= 0) + graph->data[graph->data_write] = *g_speeds.metrics[graph->source_metric].p_value; + graph->data_write = (graph->data_write + 1) % graph->data_count; y = drawGraph(graph, y) + 10; } @@ -483,7 +555,6 @@ static int drawGraphs( int y ) { return y; } - static void togglePause( void ) { if (g_speeds.paused_events) { Mem_Free(g_speeds.paused_events); @@ -495,31 +566,36 @@ static void togglePause( void ) { } } -typedef struct { - const char *s; - int len; -} const_string_view_t; +static void speedsGraphAdd(const_string_view_t name, int metric_index) { + gEngine.Con_Printf("Adding profiler graph for metric %.*s(%d) at graph index %d\n", name.len, name.s, metric_index, g_speeds.graphs_count); -static int stringViewCmp(const_string_view_t sv, const char* s) { - for (int i = 0; i < sv.len; ++i) { - const int d = sv.s[i] - s[i]; - if (d != 0) - return d; - if (s[i] == '\0') - return 1; + if (g_speeds.graphs_count == MAX_GRAPHS) { + gEngine.Con_Printf(S_ERROR "Cannot add graph \"%.*s\", no free graphs slots (max=%d)\n", name.len, name.s, MAX_GRAPHS); + return; } - // Check that both strings end the same - return '\0' - s[sv.len]; -} - -static int findMetricIndexByName( const_string_view_t name) { - for (int i = 0; i < g_speeds.metrics_count; ++i) { - if (stringViewCmp(name, g_speeds.metrics[i].name) == 0) - return i; + if (metric_index >= 0) { + r_speeds_metric_t *const metric = g_speeds.metrics + metric_index; + metric->graph_index = g_speeds.graphs_count; } - return -1; + r_speeds_graph_t *const graph = g_speeds.graphs + g_speeds.graphs_count++; + + // TODO make these customizable + graph->data_count = 256; + graph->height = 100; + graph->max_value = 1; // Will be computed automatically on first frame + graph->color[3] = 255; + + const int len = Q_min(name.len, sizeof(graph->name) - 1); + memcpy(graph->name, name.s, len); + graph->name[len] = '\0'; + getColorForString(graph->name, graph->color); + + ASSERT(!graph->data); + graph->data = Mem_Calloc(vk_core.pool, graph->data_count * sizeof(float)); + graph->data_write = 0; + graph->source_metric = metric_index; } static void speedsGraphAddByMetricName( const_string_view_t name ) { @@ -535,40 +611,57 @@ static void speedsGraphAddByMetricName( const_string_view_t name ) { return; } - if (g_speeds.graphs_count == MAX_GRAPHS) { - gEngine.Con_Printf(S_ERROR "Cannot add graph for metric \"%.*s\", no free graphs slots (max=%d)\n", name.len, name.s, MAX_GRAPHS); + speedsGraphAdd( name, metric_index ); +} + +static void speedsGraphDelete( r_speeds_graph_t *graph ) { + ASSERT(graph->data); + Mem_Free(graph->data); + graph->data = NULL; + graph->name[0] = '\0'; + + if (graph->source_metric >= 0) { + ASSERT(graph->source_metric < g_speeds.metrics_count); + r_speeds_metric_t *const metric = g_speeds.metrics + graph->source_metric; + metric->graph_index = -1; + } + + graph->source_metric = -1; +} + +static void speedsGraphRemoveByName( const_string_view_t name ) { + const int graph_index = findGraphIndexByName(name); + if (graph_index < 0) { + gEngine.Con_Printf(S_ERROR "Graph \"%.*s\" not found\n", name.len, name.s); return; } - gEngine.Con_Printf("Adding profiler graph for metric %.*s(%d) at graph index %d\n", name.len, name.s, metric_index, g_speeds.graphs_count); + r_speeds_graph_t *const graph = g_speeds.graphs + graph_index; + speedsGraphDelete( graph ); - metric->graph_index = g_speeds.graphs_count++; - r_speeds_graph_t *const graph = g_speeds.graphs + metric->graph_index; + gEngine.Con_Printf("Removing profiler graph %.*s(%d) at graph index %d\n", name.len, name.s, graph->source_metric, graph_index); - // TODO make these customizable - graph->data_count = 256; - graph->height = 100; - graph->max_value = 1; // Will be computed automatically on first frame - graph->color[3] = 255; - getColorForString(metric->name, graph->color); + // Move all further graphs one slot back, also updating their indices + for (int i = graph_index + 1; i < g_speeds.graphs_count; ++i) { + r_speeds_graph_t *const dst = g_speeds.graphs + i - 1; + const r_speeds_graph_t *const src = g_speeds.graphs + i; - ASSERT(!graph->data); - graph->data = Mem_Calloc(vk_core.pool, graph->data_count * sizeof(float)); - graph->data_write = 0; - graph->source_metric = metric_index; + if (src->source_metric >= 0) { + ASSERT(src->source_metric < g_speeds.metrics_count); + g_speeds.metrics[src->source_metric].graph_index--; + } + + memcpy(dst, src, sizeof(r_speeds_graph_t)); + } + + g_speeds.graphs_count--; } static void speedsGraphsRemoveAll( void ) { gEngine.Con_Printf("Removing all %d profiler graphs\n", g_speeds.graphs_count); for (int i = 0; i < g_speeds.graphs_count; ++i) { r_speeds_graph_t *const graph = g_speeds.graphs + i; - ASSERT(graph->data); - Mem_Free(graph->data); - graph->data = NULL; - - ASSERT(graph->source_metric >= 0); - ASSERT(graph->source_metric < g_speeds.metrics_count); - g_speeds.metrics[graph->source_metric].graph_index = -1; + speedsGraphDelete(graph); } g_speeds.graphs_count = 0; @@ -578,6 +671,9 @@ static void processGraphCvar( void ) { if (!(g_speeds.r_speeds_graphs->flags & FCVAR_CHANGED)) return; + if (0 == Q_strcmp(g_speeds.r_speeds_graphs->string, g_speeds.graphs_list)) + return; + // TODO only remove graphs that are not present in the new list speedsGraphsRemoveAll(); @@ -585,7 +681,13 @@ static void processGraphCvar( void ) { while (*p) { const char *next = Q_strchrnul(p, ','); const const_string_view_t name = {p, next - p}; - speedsGraphAddByMetricName( name ); + + const int metric_index = findMetricIndexByName(name); + if (metric_index < 0) { + gEngine.Con_Printf(S_WARN "Metric \"%.*s\" not found (yet? can be registered later)\n", name.len, name.s); + } + + speedsGraphAdd( name, metric_index ); if (!*next) break; p = next + 1; @@ -594,16 +696,129 @@ static void processGraphCvar( void ) { g_speeds.r_speeds_graphs->flags &= ~FCVAR_CHANGED; } +static const char *getMetricTypeName(r_speeds_metric_type_t type) { + switch (type) { + case kSpeedsMetricCount: return "count"; + case kSpeedsMetricMicroseconds: return "ms"; + case kSpeedsMetricBytes: return "bytes"; + } + + return "UNKNOWN"; +} + +// Ideally, we'd just autocomplete the r_speeds_graphs cvar/cmd. +// However, autocompletion is not exposed to the renderer. It is completely internal to the engine, see con_utils.c, var cmd_list. +static void listMetrics( void ) { + if (gEngine.Cmd_Argc() > 1) { + Q_strncpy(g_speeds.frame.list_metrics_filter, gEngine.Cmd_Argv(1), sizeof(g_speeds.frame.list_metrics_filter)); + } else { + g_speeds.frame.list_metrics_filter[0] = '\0'; + } + + g_speeds.frame.list_metrics = true; +} + +static void doListMetrics( void ) { + if (!g_speeds.frame.list_metrics) + return; + + g_speeds.frame.list_metrics = false; + const char *const filter = g_speeds.frame.list_metrics_filter; + + for (int i = 0; i < g_speeds.metrics_count; ++i) { + const r_speeds_metric_t *metric = g_speeds.metrics + i; + + if (filter[0] && !Q_strstr(metric->name, filter)) + continue; + + char buf[16]; + metricTypeSnprintf(buf, sizeof(buf), *metric->p_value, metric->type); + gEngine.Con_Printf(" ^2%s^7 %s, value = ^3%s^7 (^5%s^7, ^6%s:%d^7)\n", metric->name, getMetricTypeName(metric->type), buf, metric->var_name, metric->src_file, metric->src_line); + } +} + +static void graphCmd( void ) { + enum { Unknown, Add, Remove, Clear } action = Unknown; + + const int argc = gEngine.Cmd_Argc(); + + if (argc > 1) { + const char *const cmd = gEngine.Cmd_Argv(1); + if (0 == Q_strcmp("add", cmd) && argc > 2) + action = Add; + else if (0 == Q_strcmp("del", cmd) && argc > 2) + action = Remove; + else if (0 == Q_strcmp("clear", cmd)) + action = Clear; + } + + + switch (action) { + case Add: + for (int i = 2; i < argc; ++i) { + const char *const arg = gEngine.Cmd_Argv(i); + const const_string_view_t name = {arg, Q_strlen(arg) }; + speedsGraphAddByMetricName( name ); + } + break; + case Remove: + for (int i = 2; i < argc; ++i) { + const char *const arg = gEngine.Cmd_Argv(i); + const const_string_view_t name = {arg, Q_strlen(arg) }; + speedsGraphRemoveByName( name ); + } + break; + case Clear: + speedsGraphsRemoveAll(); + break; + case Unknown: + gEngine.Con_Printf("Usage:\n%s metric0 metric1 ...\n", gEngine.Cmd_Argv(0)); + gEngine.Con_Printf("\t%s metric0 metric1 ...\n", gEngine.Cmd_Argv(0)); + gEngine.Con_Printf("\t%s clear\n", gEngine.Cmd_Argv(0)); + return; + } + + // update cvar + { + const int len = sizeof(g_speeds.graphs_list) - 1; + char *const buf = g_speeds.graphs_list; + + buf[0] = '\0'; + int off = 0; + for (int i = 0; i < g_speeds.graphs_count; ++i) { + const r_speeds_graph_t *const graph = g_speeds.graphs + i; + + if (off) + buf[off++] = ','; + + //gEngine.Con_Reportf("buf='%s' off=%d %s(%d)\n", buf, off, graph->name, (int)Q_strlen(graph->name)); + + const char *s = graph->name; + while (off < len && *s) + buf[off++] = *s++; + + buf[off] = '\0'; + + if (off >= len - 1) + break; + } + + gEngine.Cvar_Set("r_speeds_graphs", buf); + } +} + void R_SpeedsInit( void ) { g_speeds.r_speeds_graphs = gEngine.Cvar_Get("r_speeds_graphs", "", FCVAR_GLCONFIG, "List of metrics to plot as graphs, separated by commas"); g_speeds.r_speeds_graphs_width = gEngine.Cvar_Get("r_speeds_graphs_width", "", FCVAR_GLCONFIG, "Graphs width in pixels"); gEngine.Cmd_AddCommand("r_speeds_toggle_pause", togglePause, "Toggle frame profiler pause"); + gEngine.Cmd_AddCommand("r_speeds_list_metrics", listMetrics, "List all registered metrics"); + gEngine.Cmd_AddCommand("r_speeds_graph", graphCmd, "Manipulate add/remove metrics graphs"); - R_SpeedsRegisterMetric(&g_speeds.frame.frame_time_us, "frame", kSpeedsMetricMicroseconds); - R_SpeedsRegisterMetric(&g_speeds.frame.cpu_time_us, "cpu", kSpeedsMetricMicroseconds); - R_SpeedsRegisterMetric(&g_speeds.frame.cpu_wait_time_us, "cpu_wait", kSpeedsMetricMicroseconds); - R_SpeedsRegisterMetric(&g_speeds.frame.gpu_time_us, "gpu", kSpeedsMetricMicroseconds); + R_SPEEDS_COUNTER(g_speeds.frame.frame_time_us, "frame", kSpeedsMetricMicroseconds); + R_SPEEDS_COUNTER(g_speeds.frame.cpu_time_us, "cpu", kSpeedsMetricMicroseconds); + R_SPEEDS_COUNTER(g_speeds.frame.cpu_wait_time_us, "cpu_wait", kSpeedsMetricMicroseconds); + R_SPEEDS_COUNTER(g_speeds.frame.gpu_time_us, "gpu", kSpeedsMetricMicroseconds); } // grab r_speeds message @@ -624,13 +839,19 @@ qboolean R_SpeedsMessage( char *out, size_t size ) return true; } -void R_SpeedsRegisterMetric(int* p_value, const char *name, r_speeds_metric_type_t type) { +void R_SpeedsRegisterMetric(int* p_value, const char *module, const char *name, r_speeds_metric_type_t type, qboolean reset, const char *var_name, const char *file, int line) { ASSERT(g_speeds.metrics_count < MAX_SPEEDS_METRICS); r_speeds_metric_t *metric = g_speeds.metrics + (g_speeds.metrics_count++); metric->p_value = p_value; - metric->name = name; + metric->reset = reset; + + Q_snprintf(metric->name, sizeof(metric->name), "%s.%s", module, name); + metric->type = type; + metric->src_file = file; + metric->src_line = line; + metric->var_name = var_name; metric->graph_index = -1; // TODO how to make universally adjustable? @@ -703,7 +924,9 @@ void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scope processGraphCvar(); - clearMetrics(); + doListMetrics(); + + resetMetrics(); APROF_SCOPE_END(function); } diff --git a/ref/vk/r_speeds.h b/ref/vk/r_speeds.h index c6f88f57..4c6c671a 100644 --- a/ref/vk/r_speeds.h +++ b/ref/vk/r_speeds.h @@ -16,4 +16,15 @@ typedef enum { kSpeedsMetricMicroseconds, } r_speeds_metric_type_t; -void R_SpeedsRegisterMetric(int* p_value, const char *name, r_speeds_metric_type_t type); +// TODO upper limit argument +void R_SpeedsRegisterMetric(int* p_value, const char *module, const char *name, r_speeds_metric_type_t type, qboolean reset, const char *var_name, const char *file, int line); + +// A counter is a value accumulated during a single frame, and reset to zero between frames. +// Examples: drawn models count, scope times, etc. +#define R_SPEEDS_COUNTER(var, name, type) \ + R_SpeedsRegisterMetric(&(var), MODULE_NAME, name, type, /*reset*/ true, #var, __FILE__, __LINE__) + +// A metric is computed and preserved across frame boundaries. +// Examples: total allocated memory, cache sizes, etc. +#define R_SPEEDS_METRIC(var, name, type) \ + R_SpeedsRegisterMetric(&(var), MODULE_NAME, name, type, /*reset*/ false, #var, __FILE__, __LINE__) diff --git a/ref/vk/shaders/brush.vert b/ref/vk/shaders/brush.vert index 86d045fe..7ad33ca3 100644 --- a/ref/vk/shaders/brush.vert +++ b/ref/vk/shaders/brush.vert @@ -19,6 +19,7 @@ layout(location=4) out vec4 vColor; void main() { vPos = aPos.xyz; + // FIXME mul by normal matrix vNormal = aNormal; vTexture0 = aTexture0; vLightmapUV = aLightmapUV; diff --git a/ref/vk/shaders/denoiser.comp b/ref/vk/shaders/denoiser.comp index df8c2160..c7f584e6 100644 --- a/ref/vk/shaders/denoiser.comp +++ b/ref/vk/shaders/denoiser.comp @@ -216,9 +216,6 @@ void main() { vec3 colour = vec3(0.); { - // DEBUG motion vectors - //colour = vec3(length(imageLoad(position_t, pix).rgb - imageLoad(prev_position_t, pix).rgb)); - // TODO: need to extract reprojecting from this shader because reprojected stuff need svgf denoising pass after it const vec3 origin = (ubo.ubo.inv_view * vec4(0., 0., 0., 1.)).xyz; const float depth = length(origin - imageLoad(position_t, pix).xyz); diff --git a/ref/vk/shaders/ray_interop.h b/ref/vk/shaders/ray_interop.h index bb6bf276..5124b260 100644 --- a/ref/vk/shaders/ray_interop.h +++ b/ref/vk/shaders/ray_interop.h @@ -69,6 +69,7 @@ LIST_SPECIALIZATION_CONSTANTS(DECLARE_SPECIALIZATION_CONSTANT) #define MATERIAL_MODE_BLEND_ADD 3 #define MATERIAL_MODE_BLEND_MIX 4 #define MATERIAL_MODE_BLEND_GLOW 5 +#define MATERIAL_MODE_COUNT 6 #define TEX_BASE_SKYBOX 0xffffffffu @@ -100,16 +101,16 @@ struct ModelHeader { }; struct Kusok { - // Geometry data + // Geometry data, static uint index_offset; uint vertex_offset; - uint triangles; // material below consists of scalar fields only, so it's not aligned to vec4. // Alignt it here to vec4 explicitly, so that later vector fields are properly aligned (for simplicity). - uint _padding0; + uint _padding0[2]; // Per-kusok because individual surfaces can be patched + // TODO? still move to material, or its own table? As this can be dynamic vec3 emissive; PAD(1) diff --git a/ref/vk/vk_beams.c b/ref/vk/vk_beams.c index a67eceb6..07fafc72 100644 --- a/ref/vk/vk_beams.c +++ b/ref/vk/vk_beams.c @@ -16,6 +16,7 @@ #include "beamdef.h" #define NOISE_DIVISIONS 64 // don't touch - many tripmines cause the crash when it equal 128 +#define MODULE_NAME "beams" typedef struct { @@ -31,7 +32,7 @@ static struct { } g_beam; qboolean R_BeamInit(void) { - R_SpeedsRegisterMetric(&g_beam.stats.beams, "beams_count", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_beam.stats.beams, "count", kSpeedsMetricCount); return true; } @@ -174,19 +175,14 @@ static void TriBrightness( float brightness ) { TriColor4f( brightness, brightness, brightness, 1.f ); } -static void R_DrawSegs( vec3_t source, vec3_t delta, float width, float scale, float freq, float speed, int segments, int flags, const vec4_t color, int texture, int render_mode ) +static void R_DrawSegs( vec3_t source, vec3_t delta, float width, float scale, float freq, float speed, int segments, int flags, const vec4_t color ) { int noiseIndex, noiseStep; int i, total_segs, segs_drawn; float div, length, fraction, factor; float flMaxWidth, vLast, vStep, brightness; - vec3_t perp1, vLastNormal = {0}; - beamseg_t curSeg = {0}; - int total_vertices = 0; - int total_indices = 0; - r_geometry_buffer_lock_t buffer; - vk_vertex_t *dst_vtx; - uint16_t *dst_idx; + vec3_t perp1, vLastNormal; + beamseg_t curSeg; if( segments < 2 ) return; @@ -240,17 +236,7 @@ static void R_DrawSegs( vec3_t source, vec3_t delta, float width, float scale, f total_segs = segments; segs_drawn = 0; - total_vertices = (total_segs - 1) * 2 + 2; - total_indices = (total_vertices - 2) * 3; // STRIP unrolled into LIST (TODO get rid of this) - ASSERT(total_vertices < UINT16_MAX ); - - if (!R_GeometryBufferAllocAndLock( &buffer, total_vertices, total_indices, LifetimeSingleFrame )) { - gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for beam\n"); - return; - } - - dst_vtx = buffer.vertices.ptr; - dst_idx = buffer.indices.ptr; + TriBegin( TRI_TRIANGLE_STRIP ); // specify all the segments. for( i = 0; i < segments; i++ ) @@ -289,7 +275,7 @@ static void R_DrawSegs( vec3_t source, vec3_t delta, float width, float scale, f nextSeg.width = width * 2.0f; nextSeg.texcoord = vLast; - if( segs_drawn > 0 ) + if( segs_drawn > 0 ) { // Get a vector that is perpendicular to us and perpendicular to the beam. // This is used to fatten the beam. @@ -315,21 +301,15 @@ static void R_DrawSegs( vec3_t source, vec3_t delta, float width, float scale, f VectorMA( curSeg.pos, ( curSeg.width * 0.5f ), vAveNormal, vPoint1 ); VectorMA( curSeg.pos, (-curSeg.width * 0.5f ), vAveNormal, vPoint2 ); - dst_vtx->lm_tc[0] = dst_vtx->lm_tc[1] = 0.f; - dst_vtx->gl_tc[0] = 0.0f; - dst_vtx->gl_tc[1] = curSeg.texcoord; - applyBrightness( brightness, dst_vtx->color ); - VectorCopy( vPoint1, dst_vtx->pos ); - VectorCopy( vAveNormal, dst_vtx->normal ); - ++dst_vtx; + TriTexCoord2f( 0.0f, curSeg.texcoord ); + TriBrightness( brightness ); + TriNormal3fv( vAveNormal ); + TriVertex3fv( vPoint1 ); - dst_vtx->lm_tc[0] = dst_vtx->lm_tc[1] = 0.f; - dst_vtx->gl_tc[0] = 1.0f; - dst_vtx->gl_tc[1] = curSeg.texcoord; - applyBrightness( brightness, dst_vtx->color ); - VectorCopy( vPoint2, dst_vtx->pos ); - VectorCopy( vAveNormal, dst_vtx->normal ); - ++dst_vtx; + TriTexCoord2f( 1.0f, curSeg.texcoord ); + TriBrightness( brightness ); + TriNormal3fv( vAveNormal ); + TriVertex3fv( vPoint2 ); } curSeg = nextSeg; @@ -349,71 +329,29 @@ static void R_DrawSegs( vec3_t source, vec3_t delta, float width, float scale, f brightness = 1.0f - fraction; } - if( segs_drawn == total_segs ) + if( segs_drawn == total_segs ) { // draw the last segment VectorMA( curSeg.pos, ( curSeg.width * 0.5f ), vLastNormal, vPoint1 ); VectorMA( curSeg.pos, (-curSeg.width * 0.5f ), vLastNormal, vPoint2 ); - dst_vtx->lm_tc[0] = dst_vtx->lm_tc[1] = 0.f; - dst_vtx->gl_tc[0] = 0.0f; - dst_vtx->gl_tc[1] = curSeg.texcoord; - applyBrightness( brightness, dst_vtx->color ); - VectorCopy( vPoint1, dst_vtx->pos ); - VectorCopy( vLastNormal, dst_vtx->normal ); - ++dst_vtx; + // specify the points. + TriTexCoord2f( 0.0f, curSeg.texcoord ); + TriBrightness( brightness ); + TriNormal3fv( vLastNormal ); + TriVertex3fv( vPoint1 ); - dst_vtx->lm_tc[0] = dst_vtx->lm_tc[1] = 0.f; - dst_vtx->gl_tc[0] = 1.0f; - dst_vtx->gl_tc[1] = curSeg.texcoord; - applyBrightness( brightness, dst_vtx->color ); - VectorCopy( vPoint2, dst_vtx->pos ); - VectorCopy( vLastNormal, dst_vtx->normal ); - ++dst_vtx; + TriTexCoord2f( 1.0f, curSeg.texcoord ); + TriBrightness( brightness ); + TriNormal3fv( vLastNormal ); + TriVertex3fv( vPoint2 ); } vLast += vStep; // Advance texture scroll (v axis only) noiseIndex += noiseStep; } - for (int i = 2; i < total_vertices; ++i) { - if( i & 1 ) - { - // draw triangle [n-1 n-2 n] - dst_idx[(i-2)*3+0] = i - 1; - dst_idx[(i-2)*3+1] = i - 2; - dst_idx[(i-2)*3+2] = i; - } - else - { - // draw triangle [n-2 n-1 n] - dst_idx[(i-2)*3+0] = i - 2; - dst_idx[(i-2)*3+1] = i - 1; - dst_idx[(i-2)*3+2] = i; - } - } - - R_GeometryBufferUnlock( &buffer ); - - { - const vk_render_geometry_t geometry = { - .texture = texture, - .material = kXVkMaterialRegular, - - .max_vertex = total_vertices, - .vertex_offset = buffer.vertices.unit_offset, - - .element_count = total_indices, - .index_offset = buffer.indices.unit_offset, - - .emissive = { color[0], color[1], color[2] }, - }; - - vk_render_type_e render_type = render_mode == kRenderNormal ? kVkRenderTypeSolid : kVkRenderType_A_1_R; - VK_RenderModelDynamicBegin( render_type, color, m_matrix4x4_identity, "beam" /* TODO its name */ ); - VK_RenderModelDynamicAddGeometry( &geometry ); - VK_RenderModelDynamicCommit(); - } + TriEndEx(color, "beam segs"); } static void R_DrawTorus( vec3_t source, vec3_t delta, float width, float scale, float freq, float speed, int segments, const vec4_t color ) @@ -1144,7 +1082,7 @@ void R_BeamDraw( BEAM *pbeam, float frametime ) break; case TE_BEAMPOINTS: case TE_BEAMHOSE: - R_DrawSegs( pbeam->source, pbeam->delta, pbeam->width, pbeam->amplitude, pbeam->freq, pbeam->speed, pbeam->segments, pbeam->flags, color, texturenum, render_mode ); + R_DrawSegs( pbeam->source, pbeam->delta, pbeam->width, pbeam->amplitude, pbeam->freq, pbeam->speed, pbeam->segments, pbeam->flags, color ); break; case TE_BEAMFOLLOW: R_DrawBeamFollow( pbeam, frametime, color ); diff --git a/ref/vk/vk_brush.c b/ref/vk/vk_brush.c index e64a6133..5c79ad0c 100644 --- a/ref/vk/vk_brush.c +++ b/ref/vk/vk_brush.c @@ -13,8 +13,8 @@ #include "vk_geometry.h" #include "vk_light.h" #include "vk_mapents.h" -#include "vk_previous_frame.h" #include "r_speeds.h" +#include "vk_logs.h" #include "ref_params.h" #include "eiface.h" @@ -22,25 +22,60 @@ #include #include +#define MODULE_NAME "brush" +#define LOG_MODULE LogModule_Brush + typedef struct vk_brush_model_s { + model_t *engine_model; + + r_geometry_range_t geometry; + vk_render_model_t render_model; - int num_water_surfaces; int *surface_to_geometry_index; int *animated_indexes; int animated_indexes_count; + + matrix4x4 prev_transform; + float prev_time; + + struct { + int surfaces_count; + const int *surfaces_indices; + + r_geometry_range_t geometry; + vk_render_model_t render_model; + } water; } vk_brush_model_t; +typedef struct { + int num_surfaces, num_vertices, num_indices; + int max_texture_id; + int water_surfaces; + int animated_count; + + int water_vertices; + int water_indices; +} model_sizes_t; + static struct { struct { - int num_vertices, num_indices; - + int total_vertices, total_indices; int models_drawn; int water_surfaces_drawn; int water_polys_drawn; } stat; int rtable[MOD_FRAMES][MOD_FRAMES]; + + // Unfortunately the engine only tracks the toplevel worldmodel. *xx submodels, while having their own entities and models, are not lifetime-tracked. + // I.e. the engine doesn't call Mod_ProcessRenderData() on them, so we don't directly know when to create or destroy them. + // Therefore, we need to track them manually and destroy them based on some other external event, e.g. Mod_ProcessRenderData(worldmodel) + vk_brush_model_t *models[MAX_MODELS]; + int models_count; + +#define MAX_ANIMATED_TEXTURES 256 + int updated_textures[MAX_ANIMATED_TEXTURES]; } g_brush; void VK_InitRandomTable( void ) @@ -65,9 +100,9 @@ qboolean VK_BrushInit( void ) { VK_InitRandomTable (); - R_SpeedsRegisterMetric(&g_brush.stat.models_drawn, "models_brush", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_brush.stat.water_surfaces_drawn, "water_surfaces", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_brush.stat.water_polys_drawn, "water_polys", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_brush.stat.models_drawn, "drawn", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_brush.stat.water_surfaces_drawn, "water.surfaces", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_brush.stat.water_polys_drawn, "water.polys", kSpeedsMetricCount); return true; } @@ -85,91 +120,79 @@ static const float r_turbsin[] = #define SUBDIVIDE_SIZE 64 #define TURBSCALE ( 256.0f / ( M_PI2 )) -/* -============= -EmitWaterPolys +static void addWarpVertIndCounts(const msurface_t *warp, int *num_vertices, int *num_indices) { + for( glpoly_t *p = warp->polys; p; p = p->next ) { + const int triangles = p->numverts - 2; + *num_vertices += p->numverts; + *num_indices += triangles * 3; + } +} -Does a water warp on the pre-fragmented glpoly_t chain -============= -*/ -static void EmitWaterPolys( const cl_entity_t *ent, const msurface_t *warp, qboolean reverse ) -{ +typedef struct { + float prev_time; + float scale; + const msurface_t *warp; + qboolean reverse; + + vk_vertex_t *dst_vertices; + uint16_t *dst_indices; + vk_render_geometry_t *dst_geometry; + + int *out_vertex_count, *out_index_count; +} compute_water_polys_t; + +static void brushComputeWaterPolys( compute_water_polys_t args ) { const float time = gpGlobals->time; - float *v, nv, waveHeight; - float prev_nv, prev_time; - float s, t, os, ot; - glpoly_t *p; - int i; - int num_vertices = 0, num_indices = 0; - int vertex_offset = 0; - uint16_t *indices; - r_geometry_buffer_lock_t buffer; - - ++g_brush.stat.water_surfaces_drawn; - - prev_time = R_PrevFrame_Time(ent->index); #define MAX_WATER_VERTICES 16 vk_vertex_t poly_vertices[MAX_WATER_VERTICES]; - const qboolean useQuads = FBitSet( warp->flags, SURF_DRAWTURB_QUADS ); + // FIXME unused? const qboolean useQuads = FBitSet( warp->flags, SURF_DRAWTURB_QUADS ); - if( !warp->polys ) return; + ASSERT(args.warp->polys); // set the current waveheight // FIXME VK if( warp->polys->verts[0][2] >= RI.vieworg[2] ) // waveHeight = -ent->curstate.scale; // else - waveHeight = ent->curstate.scale; + // waveHeight = ent->curstate.scale; + const float scale = args.scale; // reset fog color for nonlightmapped water // FIXME VK GL_ResetFogColor(); - // Compute vertex count - for( p = warp->polys; p; p = p->next ) { - const int triangles = p->numverts - 2; - num_vertices += p->numverts; - num_indices += triangles * 3; - } + int vertices = 0; + int indices = 0; - if (!R_GeometryBufferAllocAndLock( &buffer, num_vertices, num_indices, LifetimeSingleFrame )) { - gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for %s\n", ent->model->name ); - return; - } - - g_brush.stat.water_polys_drawn += num_indices / 3; - - indices = buffer.indices.ptr; - - for( p = warp->polys; p; p = p->next ) - { + for( glpoly_t *p = args.warp->polys; p; p = p->next ) { ASSERT(p->numverts <= MAX_WATER_VERTICES); - - if( reverse ) + float *v; + if( args.reverse ) v = p->verts[0] + ( p->numverts - 1 ) * VERTEXSIZE; else v = p->verts[0]; - for( i = 0; i < p->numverts; i++ ) + for( int i = 0; i < p->numverts; i++ ) { - if( waveHeight ) + float nv, prev_nv; + if( scale ) { nv = r_turbsin[(int)(time * 160.0f + v[1] + v[0]) & 255] + 8.0f; nv = (r_turbsin[(int)(v[0] * 5.0f + time * 171.0f - v[1]) & 255] + 8.0f ) * 0.8f + nv; - nv = nv * waveHeight + v[2]; + nv = nv * scale + v[2]; - prev_nv = r_turbsin[(int)(prev_time * 160.0f + v[1] + v[0]) & 255] + 8.0f; - prev_nv = (r_turbsin[(int)(v[0] * 5.0f + prev_time * 171.0f - v[1]) & 255] + 8.0f ) * 0.8f + prev_nv; - prev_nv = prev_nv * waveHeight + v[2]; + prev_nv = r_turbsin[(int)(args.prev_time * 160.0f + v[1] + v[0]) & 255] + 8.0f; + prev_nv = (r_turbsin[(int)(v[0] * 5.0f + args.prev_time * 171.0f - v[1]) & 255] + 8.0f ) * 0.8f + prev_nv; + prev_nv = prev_nv * scale + v[2]; } else prev_nv = nv = v[2]; - os = v[3]; - ot = v[4]; + const float os = v[3]; + const float ot = v[4]; - s = os + r_turbsin[(int)((ot * 0.125f + gpGlobals->time) * TURBSCALE) & 255]; + float s = os + r_turbsin[(int)((ot * 0.125f + gpGlobals->time) * TURBSCALE) & 255]; s *= ( 1.0f / SUBDIVIDE_SIZE ); - t = ot + r_turbsin[(int)((os * 0.125f + gpGlobals->time) * TURBSCALE) & 255]; + float t = ot + r_turbsin[(int)((os * 0.125f + gpGlobals->time) * TURBSCALE) & 255]; t *= ( 1.0f / SUBDIVIDE_SIZE ); poly_vertices[i].pos[0] = v[0]; @@ -188,73 +211,59 @@ static void EmitWaterPolys( const cl_entity_t *ent, const msurface_t *warp, qboo Vector4Set(poly_vertices[i].color, 255, 255, 255, 255); -#define WATER_NORMALS poly_vertices[i].normal[0] = 0; poly_vertices[i].normal[1] = 0; -#ifdef WATER_NORMALS poly_vertices[i].normal[2] = 0; -#else - poly_vertices[i].normal[2] = 1; -#endif - // Ray tracing apparently expects triangle list only (although spec is not very clear about this kekw) if (i > 1) { -#ifdef WATER_NORMALS vec3_t e0, e1, normal; VectorSubtract( poly_vertices[i - 1].pos, poly_vertices[0].pos, e0 ); VectorSubtract( poly_vertices[i].pos, poly_vertices[0].pos, e1 ); CrossProduct( e1, e0, normal ); - //VectorNormalize(normal); VectorAdd(normal, poly_vertices[0].normal, poly_vertices[0].normal); VectorAdd(normal, poly_vertices[i].normal, poly_vertices[i].normal); VectorAdd(normal, poly_vertices[i - 1].normal, poly_vertices[i - 1].normal); -#endif - *(indices++) = (uint16_t)(vertex_offset); - *(indices++) = (uint16_t)(vertex_offset + i - 1); - *(indices++) = (uint16_t)(vertex_offset + i); + + args.dst_indices[indices++] = (uint16_t)(vertices); + args.dst_indices[indices++] = (uint16_t)(vertices + i - 1); + args.dst_indices[indices++] = (uint16_t)(vertices + i); } - if( reverse ) + if( args.reverse ) v -= VERTEXSIZE; else v += VERTEXSIZE; } -#ifdef WATER_NORMALS - for( i = 0; i < p->numverts; i++ ) { + for( int i = 0; i < p->numverts; i++ ) VectorNormalize(poly_vertices[i].normal); - } -#endif - memcpy(buffer.vertices.ptr + vertex_offset, poly_vertices, sizeof(vk_vertex_t) * p->numverts); - vertex_offset += p->numverts; - } - - R_GeometryBufferUnlock( &buffer ); - - // Render - { - vec3_t emissive; - RT_GetEmissiveForTexture(emissive, warp->texinfo->texture->gl_texturenum); - - const vk_render_geometry_t geometry = { - .texture = warp->texinfo->texture->gl_texturenum, // FIXME assert >= 0 - .material = kXVkMaterialRegular, - .surf_deprecate = warp, - - .max_vertex = num_vertices, - .vertex_offset = buffer.vertices.unit_offset, - - .element_count = num_indices, - .index_offset = buffer.indices.unit_offset, - .emissive = {emissive[0], emissive[1], emissive[2]}, - }; - - VK_RenderModelDynamicAddGeometry( &geometry ); + memcpy(args.dst_vertices + vertices, poly_vertices, sizeof(vk_vertex_t) * p->numverts); + vertices += p->numverts; } // FIXME VK GL_SetupFogColorForSurfaces(); + + // Render + const int tex_id = args.warp->texinfo->texture->gl_texturenum; + *args.dst_geometry = (vk_render_geometry_t){ + .texture = tex_id, // FIXME assert >= 0 + .material = kXVkMaterialRegular, + .surf_deprecate = args.warp, + + .max_vertex = vertices, + .element_count = indices, + + .emissive = {0,0,0}, + }; + + RT_GetEmissiveForTexture(args.dst_geometry->emissive, tex_id); + *args.out_vertex_count = vertices; + *args.out_index_count = indices; + + g_brush.stat.water_surfaces_drawn++; + g_brush.stat.water_polys_drawn += indices / 3; } static vk_render_type_e brushRenderModeToRenderType( int render_mode ) { @@ -271,10 +280,10 @@ static vk_render_type_e brushRenderModeToRenderType( int render_mode ) { return kVkRenderTypeSolid; } -static void brushDrawWaterSurfaces( const cl_entity_t *ent, const vec4_t color, const matrix4x4 transform ) -{ +#if 0 // TOO OLD +static void brushDrawWaterSurfaces( const cl_entity_t *ent, const vec4_t color, const matrix4x4 transform ) { const model_t *model = ent->model; - vec3_t mins, maxs; + vec3_t mins, maxs; if( !VectorIsNull( ent->angles )) { @@ -320,6 +329,215 @@ static void brushDrawWaterSurfaces( const cl_entity_t *ent, const vec4_t color, // TODO: // - upload water geometry only once, animate in compute/vertex shader } +#endif + +static void fillWaterSurfaces( const cl_entity_t *ent, vk_brush_model_t *bmodel, vk_render_geometry_t *geometries ) { + ASSERT(bmodel->water.surfaces_count > 0); + + const r_geometry_range_lock_t geom_lock = R_GeometryRangeLock(&bmodel->water.geometry); + + const float scale = ent ? ent->curstate.scale : 1.f; + + int vertices_offset = 0; + int indices_offset = 0; + for (int i = 0; i < bmodel->water.surfaces_count; ++i) { + const int surf_index = bmodel->water.surfaces_indices[i]; + + int vertices = 0, indices = 0; + brushComputeWaterPolys((compute_water_polys_t){ + .prev_time = bmodel->prev_time, + .scale = scale, + .reverse = false, // ??? is it ever true? + .warp = bmodel->engine_model->surfaces + surf_index, + + .dst_vertices = geom_lock.vertices + vertices_offset, + .dst_indices = geom_lock.indices + indices_offset, + .dst_geometry = geometries + i, + + .out_vertex_count = &vertices, + .out_index_count = &indices, + }); + + geometries[i].vertex_offset = bmodel->water.geometry.vertices.unit_offset + vertices_offset; + geometries[i].index_offset = bmodel->water.geometry.indices.unit_offset + indices_offset; + + vertices_offset += vertices; + indices_offset += indices; + + ASSERT(vertices_offset <= bmodel->water.geometry.vertices.count); + ASSERT(indices_offset <= bmodel->water.geometry.indices.count); + } + + R_GeometryRangeUnlock( &geom_lock ); +} + +static rt_light_add_polygon_t loadPolyLight(const model_t *mod, const int surface_index, const msurface_t *surf, const vec3_t emissive); + +static qboolean isSurfaceAnimated( const msurface_t *s, const struct texture_s *base_override ) { + const texture_t *base = base_override ? base_override : s->texinfo->texture; + + /* FIXME don't have ent here, need to check both explicitly + if( ent && ent->curstate.frame ) { + if( base->alternate_anims ) + base = base->alternate_anims; + } + */ + + if( !base->anim_total ) + return false; + + if( base->name[0] == '-' ) + return false; + + return true; +} + +typedef enum { + BrushSurface_Hidden = 0, + BrushSurface_Regular, + BrushSurface_Animated, + BrushSurface_Water, + BrushSurface_Sky, +} brush_surface_type_e; + +static brush_surface_type_e getSurfaceType( const msurface_t *surf, int i ) { +// if ( i >= 0 && (surf->flags & ~(SURF_PLANEBACK | SURF_UNDERWATER | SURF_TRANSPARENT)) != 0) +// { +// DEBUG("\t%d flags: ", i); +// #define PRINTFLAGS(X) \ +// X(SURF_PLANEBACK) \ +// X(SURF_DRAWSKY) \ +// X(SURF_DRAWTURB_QUADS) \ +// X(SURF_DRAWTURB) \ +// X(SURF_DRAWTILED) \ +// X(SURF_CONVEYOR) \ +// X(SURF_UNDERWATER) \ +// X(SURF_TRANSPARENT) + +// #define PRINTFLAG(f) if (FBitSet(surf->flags, f)) DEBUG(" %s", #f); +// PRINTFLAGS(PRINTFLAG) +// DEBUG("\n"); +// } + const xvk_patch_surface_t *patch_surface = R_VkPatchGetSurface(i); + if (patch_surface && patch_surface->flags & Patch_Surface_Delete) + return BrushSurface_Hidden; + + if (surf->flags & (SURF_DRAWTURB | SURF_DRAWTURB_QUADS)) { + return (!surf->polys) ? BrushSurface_Hidden : BrushSurface_Water; + } + + // Explicitly enable SURF_SKY, otherwise they will be skipped by SURF_DRAWTILED + if( FBitSet( surf->flags, SURF_DRAWSKY )) + return BrushSurface_Sky; + + //if( surf->flags & ( SURF_DRAWSKY | SURF_DRAWTURB | SURF_CONVEYOR | SURF_DRAWTURB_QUADS ) ) { + if( surf->flags & ( SURF_DRAWTURB | SURF_DRAWTURB_QUADS ) ) { + //if( surf->flags & ( SURF_DRAWSKY | SURF_CONVEYOR ) ) { + // FIXME don't print this on second sort-by-texture pass + //DEBUG("Skipping surface %d because of flags %08x", i, surf->flags); + return BrushSurface_Hidden; + } + + if( FBitSet( surf->flags, SURF_DRAWTILED )) { + //DEBUG("Skipping surface %d because of tiled flag", i); + return BrushSurface_Hidden; + } + + const struct texture_s *texture_override = patch_surface ? patch_surface->tex : NULL; + if (isSurfaceAnimated(surf, texture_override)) { + return BrushSurface_Animated; + } + + return BrushSurface_Regular; +} + +static qboolean brushCreateWaterModel(const model_t *mod, vk_brush_model_t *bmodel, const model_sizes_t sizes) { + bmodel->water.surfaces_count = sizes.water_surfaces; + + const r_geometry_range_t geometry = R_GeometryRangeAlloc(sizes.water_vertices, sizes.water_indices); + if (!geometry.block_handle.size) { + ERR("Cannot allocate geometry (v=%d, i=%d) for water model %s", + sizes.water_vertices, sizes.water_indices, mod->name ); + return false; + } + + vk_render_geometry_t *const geometries = Mem_Malloc(vk_core.pool, sizeof(vk_render_geometry_t) * sizes.water_surfaces); + + int* const surfaces_indices = Mem_Malloc(vk_core.pool, sizes.water_surfaces * sizeof(int)); + int index_index = 0; + for( int i = 0; i < mod->nummodelsurfaces; ++i) { + const int surface_index = mod->firstmodelsurface + i; + const msurface_t *surf = mod->surfaces + surface_index; + + if (getSurfaceType(surf, surface_index) != BrushSurface_Water) + continue; + + surfaces_indices[index_index++] = surface_index; + } + ASSERT(index_index == sizes.water_surfaces); + + bmodel->water.surfaces_indices = surfaces_indices; + bmodel->water.geometry = geometry; + fillWaterSurfaces(NULL, bmodel, geometries); + + if (!R_RenderModelCreate(&bmodel->water.render_model, (vk_render_model_init_t){ + .name = mod->name, + .geometries = geometries, + .geometries_count = sizes.water_surfaces, + .dynamic = true, + })) { + ERR("Could not create water render model for brush model %s", mod->name); + return false; + } + + bmodel->water.surfaces_indices = surfaces_indices; + return true; +} + +static void brushDrawWater(vk_brush_model_t *bmodel, const cl_entity_t *ent, int render_type, const vec4_t color, const matrix4x4 transform) { + ASSERT(bmodel->water.surfaces_count > 0); + + fillWaterSurfaces(NULL, bmodel, bmodel->water.render_model.geometries); + if (!R_RenderModelUpdate(&bmodel->water.render_model)) { + ERR("Failed to update brush model \"%s\" water", bmodel->render_model.debug_name); + } + + R_RenderModelDraw(&bmodel->water.render_model, (r_model_draw_t){ + .render_type = render_type, + .color = (const vec4_t*)color, + .transform = (const matrix4x4*)transform, + .prev_transform = &bmodel->prev_transform, + }); +} + +// FIXME use this +static void computeConveyorSpeed(const color24 rendercolor, int tex_index, vec2_t speed) { + float sy, cy; + float flConveyorSpeed = 0.0f; + float flRate, flAngle; + vk_texture_t *texture = findTexture( tex_index ); + //gl_texture_t *texture; + + // FIXME + /* if( ENGINE_GET_PARM( PARM_QUAKE_COMPATIBLE ) && RI.currententity == gEngfuncs.GetEntityByIndex( 0 ) ) */ + /* { */ + /* // same as doom speed */ + /* flConveyorSpeed = -35.0f; */ + /* } */ + /* else */ + { + flConveyorSpeed = (rendercolor.g<<8|rendercolor.b) / 16.0f; + if( rendercolor.r ) flConveyorSpeed = -flConveyorSpeed; + } + //texture = R_GetTexture( glState.currentTextures[glState.activeTMU] ); + + flRate = fabs( flConveyorSpeed ) / (float)texture->width; + flAngle = ( flConveyorSpeed >= 0 ) ? 180 : 0; + + SinCos( flAngle * ( M_PI_F / 180.0f ), &sy, &cy ); + speed[0] = cy * flRate; + speed[1] = sy * flRate; +} /* =============== @@ -374,75 +592,57 @@ const texture_t *R_TextureAnimation( const cl_entity_t *ent, const msurface_t *s return base; } -static qboolean isSurfaceAnimated( const msurface_t *s, const struct texture_s *base_override ) { - const texture_t *base = base_override ? base_override : s->texinfo->texture; - - /* FIXME don't have ent here, need to check both explicitly - if( ent && ent->curstate.frame ) { - if( base->alternate_anims ) - base = base->alternate_anims; - } - */ - - if( !base->anim_total ) - return false; - - if( base->name[0] == '-' ) - return false; - - return true; -} - -void VK_BrushModelDraw( const cl_entity_t *ent, int render_mode, float blend, const matrix4x4 transform ) { +void VK_BrushModelDraw( const cl_entity_t *ent, int render_mode, float blend, const matrix4x4 in_transform ) { // Expect all buffers to be bound const model_t *mod = ent->model; vk_brush_model_t *bmodel = mod->cache.data; if (!bmodel) { - gEngine.Con_Printf( S_ERROR "Model %s wasn't loaded\n", mod->name); + ERR("Model %s wasn't loaded", mod->name); return; } - if (transform) - Matrix4x4_Copy(bmodel->render_model.transform, transform); + matrix4x4 transform; + if (in_transform) + Matrix4x4_Copy(transform, in_transform); else - Matrix4x4_LoadIdentity(bmodel->render_model.transform); + Matrix4x4_LoadIdentity(transform); - Vector4Set(bmodel->render_model.color, 1.f, 1.f, 1.f, 1.f); + vec4_t color = {1, 1, 1, 1}; vk_render_type_e render_type = kVkRenderTypeSolid; switch (render_mode) { case kRenderNormal: - Vector4Set(bmodel->render_model.color, 1.f, 1.f, 1.f, 1.f); + Vector4Set(color, 1.f, 1.f, 1.f, 1.f); render_type = kVkRenderTypeSolid; break; case kRenderTransColor: render_type = kVkRenderType_A_1mA_RW; - Vector4Set(bmodel->render_model.color, + Vector4Set(color, ent->curstate.rendercolor.r / 255.f, ent->curstate.rendercolor.g / 255.f, ent->curstate.rendercolor.b / 255.f, blend); break; case kRenderTransAdd: - Vector4Set(bmodel->render_model.color, blend, blend, blend, 1.f); + Vector4Set(color, blend, blend, blend, 1.f); render_type = kVkRenderType_A_1_R; break; case kRenderTransAlpha: if( gEngine.EngineGetParm( PARM_QUAKE_COMPATIBLE, 0 )) { render_type = kVkRenderType_A_1mA_RW; - Vector4Set(bmodel->render_model.color, 1.f, 1.f, 1.f, blend); + Vector4Set(color, 1.f, 1.f, 1.f, blend); } else { - Vector4Set(bmodel->render_model.color, 1.f, 1.f, 1.f, 1.f); + Vector4Set(color, 1.f, 1.f, 1.f, 1.f); render_type = kVkRenderType_AT; } break; case kRenderTransTexture: case kRenderGlow: render_type = kVkRenderType_A_1mA_R; - Vector4Set(bmodel->render_model.color, 1.f, 1.f, 1.f, blend); + Vector4Set(color, 1.f, 1.f, 1.f, blend); break; } @@ -450,15 +650,14 @@ void VK_BrushModelDraw( const cl_entity_t *ent, int render_mode, float blend, co // TODO: on big maps more than a single lightmap texture is possible bmodel->render_model.lightmap = (render_mode == kRenderNormal || render_mode == kRenderTransAlpha) ? 1 : 0; - if (bmodel->num_water_surfaces) { - brushDrawWaterSurfaces(ent, bmodel->render_model.color, bmodel->render_model.transform); - } + if (bmodel->water.surfaces_count) + brushDrawWater(bmodel, ent, render_type, color, transform); + + ++g_brush.stat.models_drawn; if (bmodel->render_model.num_geometries == 0) return; - ++g_brush.stat.models_drawn; - // TransColor means ignore textures and draw just color if (render_mode == kRenderTransColor) { // TODO cache previous render_mode. @@ -469,88 +668,53 @@ void VK_BrushModelDraw( const cl_entity_t *ent, int render_mode, float blend, co } } else { // Update animated textures + int updated_textures_count = 0; for (int i = 0; i < bmodel->animated_indexes_count; ++i) { - vk_render_geometry_t *geom = bmodel->render_model.geometries + bmodel->animated_indexes[i]; + const int geom_index = bmodel->animated_indexes[i]; + vk_render_geometry_t *geom = bmodel->render_model.geometries + geom_index; const int surface_index = geom->surf_deprecate - mod->surfaces; const xvk_patch_surface_t *const patch_surface = R_VkPatchGetSurface(surface_index); // Optionally patch by texture_s pointer and run animations const struct texture_s *texture_override = patch_surface ? patch_surface->tex : NULL; const texture_t *t = R_TextureAnimation(ent, geom->surf_deprecate, texture_override); - if (t->gl_texturenum >= 0) + const int new_texture = t->gl_texturenum; + + if (new_texture >= 0 && new_texture != geom->texture) { geom->texture = t->gl_texturenum; + if (updated_textures_count < MAX_ANIMATED_TEXTURES) { + g_brush.updated_textures[updated_textures_count++] = bmodel->animated_indexes[i]; + } + } + + // Animated textures can be emissive + // Add them as dynamic lights for now. It would probably be better if they were static lights (for worldmodel), + // but there's no easy way to do it for now. + vec3_t *emissive = &bmodel->render_model.geometries[geom_index].emissive; + if (RT_GetEmissiveForTexture(*emissive, geom->texture)) { + rt_light_add_polygon_t polylight = loadPolyLight(mod, surface_index, geom->surf_deprecate, *emissive); + polylight.dynamic = true; + polylight.transform_row = (const matrix3x4*)&transform; + RT_LightAddPolygon(&polylight); + } + } + + if (updated_textures_count > 0) { + R_RenderModelUpdateMaterials(&bmodel->render_model, g_brush.updated_textures, updated_textures_count); } } - bmodel->render_model.render_type = render_type; - VK_RenderModelDraw(ent, &bmodel->render_model); + R_RenderModelDraw(&bmodel->render_model, (r_model_draw_t){ + .render_type = render_type, + .color = &color, + .transform = &transform, + .prev_transform = &bmodel->prev_transform, + }); + + Matrix4x4_Copy(bmodel->prev_transform, transform); + bmodel->prev_time = gpGlobals->time; } -typedef enum { - BrushSurface_Hidden = 0, - BrushSurface_Regular, - BrushSurface_Animated, - BrushSurface_Water, - BrushSurface_Sky, -} brush_surface_type_e; - -static brush_surface_type_e getSurfaceType( const msurface_t *surf, int i ) { -// if ( i >= 0 && (surf->flags & ~(SURF_PLANEBACK | SURF_UNDERWATER | SURF_TRANSPARENT)) != 0) -// { -// gEngine.Con_Reportf("\t%d flags: ", i); -// #define PRINTFLAGS(X) \ -// X(SURF_PLANEBACK) \ -// X(SURF_DRAWSKY) \ -// X(SURF_DRAWTURB_QUADS) \ -// X(SURF_DRAWTURB) \ -// X(SURF_DRAWTILED) \ -// X(SURF_CONVEYOR) \ -// X(SURF_UNDERWATER) \ -// X(SURF_TRANSPARENT) - -// #define PRINTFLAG(f) if (FBitSet(surf->flags, f)) gEngine.Con_Reportf(" %s", #f); -// PRINTFLAGS(PRINTFLAG) -// gEngine.Con_Reportf("\n"); -// } - const xvk_patch_surface_t *patch_surface = R_VkPatchGetSurface(i); - if (patch_surface && patch_surface->flags & Patch_Surface_Delete) - return BrushSurface_Hidden; - - if (surf->flags & (SURF_DRAWTURB | SURF_DRAWTURB_QUADS)) - return BrushSurface_Water; - - // Explicitly enable SURF_SKY, otherwise they will be skipped by SURF_DRAWTILED - if( FBitSet( surf->flags, SURF_DRAWSKY )) - return BrushSurface_Sky; - - //if( surf->flags & ( SURF_DRAWSKY | SURF_DRAWTURB | SURF_CONVEYOR | SURF_DRAWTURB_QUADS ) ) { - if( surf->flags & ( SURF_DRAWTURB | SURF_DRAWTURB_QUADS ) ) { - //if( surf->flags & ( SURF_DRAWSKY | SURF_CONVEYOR ) ) { - // FIXME don't print this on second sort-by-texture pass - //gEngine.Con_Reportf("Skipping surface %d because of flags %08x\n", i, surf->flags); - return BrushSurface_Hidden; - } - - if( FBitSet( surf->flags, SURF_DRAWTILED )) { - //gEngine.Con_Reportf("Skipping surface %d because of tiled flag\n", i); - return BrushSurface_Hidden; - } - - const struct texture_s *texture_override = patch_surface ? patch_surface->tex : NULL; - if (isSurfaceAnimated(surf, texture_override)) { - return BrushSurface_Animated; - } - - return BrushSurface_Regular; -} - -typedef struct { - int num_surfaces, num_vertices, num_indices; - int max_texture_id; - int water_surfaces; - int animated_count; -} model_sizes_t; - static model_sizes_t computeSizes( const model_t *mod ) { model_sizes_t sizes = {0}; @@ -566,6 +730,7 @@ static model_sizes_t computeSizes( const model_t *mod ) { switch (getSurfaceType(surf, surface_index)) { case BrushSurface_Water: sizes.water_surfaces++; + addWarpVertIndCounts(surf, &sizes.water_vertices, &sizes.water_indices); case BrushSurface_Hidden: continue; @@ -581,39 +746,40 @@ static model_sizes_t computeSizes( const model_t *mod ) { sizes.num_indices += 3 * (surf->numedges - 1); } + DEBUG("Computed sizes for brush model \"%s\": num_surfaces=%d num_vertices=%d num_indices=%d max_texture_id=%d water_surfaces=%d animated_count=%d water_vertices=%d water_indices=%d", mod->name, sizes.num_surfaces, sizes.num_vertices, sizes.num_indices, sizes.max_texture_id, sizes.water_surfaces, sizes.animated_count, sizes.water_vertices, sizes.water_indices); + return sizes; } -static qboolean loadBrushSurfaces( model_sizes_t sizes, const model_t *mod ) { - vk_brush_model_t *bmodel = mod->cache.data; - uint32_t vertex_offset = 0; +typedef struct { + const model_t *mod; + const vk_brush_model_t *bmodel; + model_sizes_t sizes; + uint32_t base_vertex_offset; + uint32_t base_index_offset; + + vk_render_geometry_t *out_geometries; + vk_vertex_t *out_vertices; + uint16_t *out_indices; +} fill_geometries_args_t; + +static qboolean fillBrushSurfaces(fill_geometries_args_t args) { + int vertex_offset = 0; int num_geometries = 0; - vk_vertex_t *bvert = NULL; - uint16_t *bind = NULL; - uint32_t index_offset = 0; - r_geometry_buffer_lock_t buffer; int animated_count = 0; - if (!R_GeometryBufferAllocAndLock( &buffer, sizes.num_vertices, sizes.num_indices, LifetimeLong )) { - gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for %s\n", mod->name ); - return false; - } - - bvert = buffer.vertices.ptr; - bind = buffer.indices.ptr; - - index_offset = buffer.indices.unit_offset; + vk_vertex_t *p_vert = args.out_vertices; + uint16_t *p_ind = args.out_indices; + int index_offset = args.base_index_offset; // Load sorted by gl_texturenum // TODO this does not make that much sense in vulkan (can sort later) - for (int t = 0; t <= sizes.max_texture_id; ++t) - { - for( int i = 0; i < mod->nummodelsurfaces; ++i) - { - const int surface_index = mod->firstmodelsurface + i; - msurface_t *surf = mod->surfaces + surface_index; - mextrasurf_t *info = surf->info; - vk_render_geometry_t *model_geometry = bmodel->render_model.geometries + num_geometries; + for (int t = 0; t <= args.sizes.max_texture_id; ++t) { + for( int i = 0; i < args.mod->nummodelsurfaces; ++i) { + const int surface_index = args.mod->firstmodelsurface + i; + msurface_t *surf = args.mod->surfaces + surface_index; + mextrasurf_t *info = surf->info; + vk_render_geometry_t *model_geometry = args.out_geometries + num_geometries; const float sample_size = gEngine.Mod_SampleSizeForFace( surf ); int index_count = 0; vec3_t tangent; @@ -632,22 +798,21 @@ static qboolean loadBrushSurfaces( model_sizes_t sizes, const model_t *mod ) { case BrushSurface_Hidden: continue; case BrushSurface_Animated: - bmodel->animated_indexes[animated_count++] = num_geometries; + args.bmodel->animated_indexes[animated_count++] = num_geometries; case BrushSurface_Regular: case BrushSurface_Sky: break; } - bmodel->surface_to_geometry_index[i] = num_geometries; + args.bmodel->surface_to_geometry_index[i] = num_geometries; ++num_geometries; - //gEngine.Con_Reportf( "surface %d: numverts=%d numedges=%d\n", i, surf->polys ? surf->polys->numverts : -1, surf->numedges ); + //DEBUG( "surface %d: numverts=%d numedges=%d", i, surf->polys ? surf->polys->numverts : -1, surf->numedges ); - if (vertex_offset + surf->numedges >= UINT16_MAX) - { - gEngine.Con_Printf(S_ERROR "Model %s indices don't fit into 16 bits\n", mod->name); - // FIXME unlock and free buffers + if (vertex_offset + surf->numedges >= UINT16_MAX) { + // We might be able to handle it by adjusting base_vertex_offset, etc + ERR("Model %s indices don't fit into 16 bits", args.mod->name); return false; } @@ -656,7 +821,7 @@ static qboolean loadBrushSurfaces( model_sizes_t sizes, const model_t *mod ) { model_geometry->surf_deprecate = surf; model_geometry->texture = tex_id; - model_geometry->vertex_offset = buffer.vertices.unit_offset; + model_geometry->vertex_offset = args.base_vertex_offset; model_geometry->max_vertex = vertex_offset + surf->numedges; model_geometry->index_offset = index_offset; @@ -666,7 +831,7 @@ static qboolean loadBrushSurfaces( model_sizes_t sizes, const model_t *mod ) { } else { model_geometry->material = kXVkMaterialRegular; ASSERT(!FBitSet( surf->flags, SURF_DRAWTILED )); - VK_CreateSurfaceLightmap( surf, mod ); + VK_CreateSurfaceLightmap( surf, args.mod ); } if (FBitSet( surf->flags, SURF_CONVEYOR )) { @@ -678,9 +843,9 @@ static qboolean loadBrushSurfaces( model_sizes_t sizes, const model_t *mod ) { for( int k = 0; k < surf->numedges; k++ ) { - const int iedge = mod->surfedges[surf->firstedge + k]; - const medge_t *edge = mod->edges + (iedge >= 0 ? iedge : -iedge); - const mvertex_t *in_vertex = mod->vertexes + (iedge >= 0 ? edge->v[0] : edge->v[1]); + const int iedge = args.mod->surfedges[surf->firstedge + k]; + const medge_t *edge = args.mod->edges + (iedge >= 0 ? iedge : -iedge); + const mvertex_t *in_vertex = args.mod->vertexes + (iedge >= 0 ? edge->v[0] : edge->v[1]); vk_vertex_t vertex = { {in_vertex->position[0], in_vertex->position[1], in_vertex->position[2]}, }; @@ -745,13 +910,13 @@ static qboolean loadBrushSurfaces( model_sizes_t sizes, const model_t *mod ) { Vector4Set(vertex.color, 255, 255, 255, 255); - *(bvert++) = vertex; + *(p_vert++) = vertex; // Ray tracing apparently expects triangle list only (although spec is not very clear about this kekw) if (k > 1) { - *(bind++) = (uint16_t)(vertex_offset + 0); - *(bind++) = (uint16_t)(vertex_offset + k - 1); - *(bind++) = (uint16_t)(vertex_offset + k); + *(p_ind++) = (uint16_t)(vertex_offset + 0); + *(p_ind++) = (uint16_t)(vertex_offset + k - 1); + *(p_ind++) = (uint16_t)(vertex_offset + k); index_count += 3; index_offset += 3; } @@ -762,15 +927,8 @@ static qboolean loadBrushSurfaces( model_sizes_t sizes, const model_t *mod ) { } } - R_GeometryBufferUnlock( &buffer ); - - bmodel->render_model.dynamic_polylights = NULL; - bmodel->render_model.dynamic_polylights_count = 0; - - ASSERT(sizes.num_surfaces == num_geometries); - ASSERT(sizes.animated_count == animated_count); - bmodel->render_model.num_geometries = num_geometries; - + ASSERT(args.sizes.num_surfaces == num_geometries); + ASSERT(args.sizes.animated_count == animated_count); return true; } @@ -785,73 +943,144 @@ static const xvk_mapent_func_wall_t *getModelFuncWallPatch( const model_t *const return NULL; } -qboolean VK_BrushModelLoad( model_t *mod ) { - if (mod->cache.data) { - gEngine.Con_Reportf( S_WARN "Model %s was already loaded\n", mod->name ); - return true; +static qboolean createRenderModel( const model_t *mod, vk_brush_model_t *bmodel, const model_sizes_t sizes ) { + bmodel->geometry = R_GeometryRangeAlloc(sizes.num_vertices, sizes.num_indices); + if (!bmodel->geometry.block_handle.size) { + ERR("Cannot allocate geometry for %s", mod->name ); + return false; } - gEngine.Con_Reportf("%s: %s flags=%08x\n", __FUNCTION__, mod->name, mod->flags); + vk_render_geometry_t *const geometries = Mem_Malloc(vk_core.pool, sizeof(vk_render_geometry_t) * sizes.num_surfaces); + bmodel->surface_to_geometry_index = Mem_Malloc(vk_core.pool, sizeof(int) * mod->nummodelsurfaces); + bmodel->animated_indexes = Mem_Malloc(vk_core.pool, sizeof(int) * sizes.animated_count); + bmodel->animated_indexes_count = sizes.animated_count; - { - const model_sizes_t sizes = computeSizes( mod ); - const size_t model_size = sizeof(vk_brush_model_t); + if (sizes.animated_count > MAX_ANIMATED_TEXTURES) { + WARN("Too many animated textures %d for model \"%s\" some surfaces can be static", sizes.animated_count, mod->name); + } - vk_brush_model_t *bmodel = Mem_Calloc(vk_core.pool, sizeof(*bmodel)); - mod->cache.data = bmodel; - Q_strncpy(bmodel->render_model.debug_name, mod->name, sizeof(bmodel->render_model.debug_name)); - bmodel->render_model.render_type = kVkRenderTypeSolid; + const r_geometry_range_lock_t geom_lock = R_GeometryRangeLock(&bmodel->geometry); - bmodel->num_water_surfaces = sizes.water_surfaces; - Vector4Set(bmodel->render_model.color, 1, 1, 1, 1); + const qboolean fill_result = fillBrushSurfaces((fill_geometries_args_t){ + .mod = mod, + .bmodel = bmodel, + .sizes = sizes, + .base_vertex_offset = bmodel->geometry.vertices.unit_offset, + .base_index_offset = bmodel->geometry.indices.unit_offset, + .out_geometries = geometries, + .out_vertices = geom_lock.vertices, + .out_indices = geom_lock.indices, + }); - if (sizes.num_surfaces != 0) { - bmodel->render_model.geometries = Mem_Malloc(vk_core.pool, sizeof(vk_render_geometry_t) * sizes.num_surfaces); - bmodel->surface_to_geometry_index = Mem_Malloc(vk_core.pool, sizeof(int) * mod->nummodelsurfaces); - bmodel->animated_indexes = Mem_Malloc(vk_core.pool, sizeof(int) * sizes.animated_count); - bmodel->animated_indexes_count = sizes.animated_count; + R_GeometryRangeUnlock( &geom_lock ); - bmodel->render_model.geometries_changed = bmodel->animated_indexes; - bmodel->render_model.geometries_changed_count = bmodel->animated_indexes_count; + if (!fill_result) { + // TODO unlock and free buffers if failed? Currently we can't free geometry range, as it is being implicitly referenced by staging queue. Flush staging and free? + // This shouldn't really happen btw, kind of unrecoverable for now tbh. + // Also, we might just handle it, as the only reason it can fail is 16 bit index overflow. + // I. Split into smaller geometries sets. + // II. Make indices 32 bit + return false; + } - if (!loadBrushSurfaces(sizes, mod) || !VK_RenderModelInit(&bmodel->render_model)) { - gEngine.Con_Printf(S_ERROR "Could not load model %s\n", mod->name); - Mem_Free(bmodel); - return false; - } - } - - g_brush.stat.num_indices += sizes.num_indices; - g_brush.stat.num_vertices += sizes.num_vertices; - - gEngine.Con_Reportf("Model %s loaded surfaces: %d (of %d); total vertices: %u, total indices: %u\n", mod->name, bmodel->render_model.num_geometries, mod->nummodelsurfaces, g_brush.stat.num_vertices, g_brush.stat.num_indices); + if (!R_RenderModelCreate(&bmodel->render_model, (vk_render_model_init_t){ + .name = mod->name, + .geometries = geometries, + .geometries_count = sizes.num_surfaces, + .dynamic = false, + })) { + ERR("Could not create render model for brush model %s", mod->name); + return false; } return true; } -void VK_BrushModelDestroy( model_t *mod ) { - vk_brush_model_t *bmodel = mod->cache.data; - ASSERT(mod->type == mod_brush); - if (!bmodel) - return; +qboolean VK_BrushModelLoad( model_t *mod ) { + if (mod->cache.data) { + WARN("Model %s was already loaded", mod->name ); + return true; + } - VK_RenderModelDestroy(&bmodel->render_model); - if (bmodel->animated_indexes) - Mem_Free(bmodel->animated_indexes); - if (bmodel->surface_to_geometry_index) - Mem_Free(bmodel->surface_to_geometry_index); - if (bmodel->render_model.geometries) - Mem_Free(bmodel->render_model.geometries); - Mem_Free(bmodel); - mod->cache.data = NULL; + DEBUG("%s: %s flags=%08x", __FUNCTION__, mod->name, mod->flags); + + vk_brush_model_t *bmodel = Mem_Calloc(vk_core.pool, sizeof(*bmodel)); + ASSERT(g_brush.models_count < COUNTOF(g_brush.models)); + g_brush.models[g_brush.models_count++] = bmodel; + + bmodel->engine_model = mod; + mod->cache.data = bmodel; + + Matrix4x4_LoadIdentity(bmodel->prev_transform); + bmodel->prev_time = gpGlobals->time; + + const model_sizes_t sizes = computeSizes( mod ); + + if (sizes.num_surfaces != 0) { + if (!createRenderModel(mod, bmodel, sizes)) { + ERR("Could not load brush model %s", mod->name); + // FIXME Cannot deallocate bmodel as we might still have staging references to its memory + return false; + } + } + + if (sizes.water_surfaces) { + if (!brushCreateWaterModel(mod, bmodel, sizes)) { + ERR("Could not load brush water model %s", mod->name); + // FIXME Cannot deallocate bmodel as we might still have staging references to its memory + return false; + } + } + + g_brush.stat.total_vertices += sizes.num_indices + sizes.water_vertices; + g_brush.stat.total_indices += sizes.num_vertices + sizes.water_indices; + + DEBUG("Model %s loaded surfaces: %d (of %d); total vertices: %u, total indices: %u", + mod->name, bmodel->render_model.num_geometries, mod->nummodelsurfaces, g_brush.stat.total_vertices, g_brush.stat.total_indices); + + return true; } -void VK_BrushStatsClear( void ) -{ - // Free previous map data - g_brush.stat.num_vertices = 0; - g_brush.stat.num_indices = 0; +static void VK_BrushModelDestroy( vk_brush_model_t *bmodel ) { + ASSERT(bmodel->engine_model); + + DEBUG("%s: %s", __FUNCTION__, bmodel->engine_model->name); + + ASSERT(bmodel->engine_model->cache.data == bmodel); + ASSERT(bmodel->engine_model->type == mod_brush); + + if (bmodel->water.surfaces_count) { + R_RenderModelDestroy(&bmodel->water.render_model); + Mem_Free((int*)bmodel->water.surfaces_indices); + Mem_Free(bmodel->water.render_model.geometries); + R_GeometryRangeFree(&bmodel->water.geometry); + } + + R_RenderModelDestroy(&bmodel->render_model); + + if (bmodel->animated_indexes) + Mem_Free(bmodel->animated_indexes); + + if (bmodel->surface_to_geometry_index) + Mem_Free(bmodel->surface_to_geometry_index); + + if (bmodel->render_model.geometries) { + Mem_Free(bmodel->render_model.geometries); + R_GeometryRangeFree(&bmodel->geometry); + } + + bmodel->engine_model->cache.data = NULL; + Mem_Free(bmodel); +} + +void VK_BrushModelDestroyAll( void ) { + DEBUG("Destroying %d brush models", g_brush.models_count); + for( int i = 0; i < g_brush.models_count; i++ ) + VK_BrushModelDestroy(g_brush.models[i]); + + g_brush.stat.total_vertices = 0; + g_brush.stat.total_indices = 0; + g_brush.models_count = 0; } static rt_light_add_polygon_t loadPolyLight(const model_t *mod, const int surface_index, const msurface_t *surf, const vec3_t emissive) { @@ -860,7 +1089,7 @@ static rt_light_add_polygon_t loadPolyLight(const model_t *mod, const int surfac // TODO split, don't clip if (surf->numedges > 7) - gEngine.Con_Printf(S_WARN "emissive surface %d has %d vertices; clipping to 7\n", surface_index, surf->numedges); + WARN("emissive surface %d has %d vertices; clipping to 7", surface_index, surf->numedges); VectorCopy(emissive, lpoly.emissive); @@ -889,6 +1118,7 @@ void R_VkBrushModelCollectEmissiveSurfaces( const struct model_s *mod, qboolean vec3_t emissive; } emissive_surface_t; emissive_surface_t emissive_surfaces[MAX_SURFACE_LIGHTS]; + int geom_indices[MAX_SURFACE_LIGHTS]; int emissive_surfaces_count = 0; // Load list of all emissive surfaces @@ -917,10 +1147,10 @@ void R_VkBrushModelCollectEmissiveSurfaces( const struct model_s *mod, qboolean continue; } - //gEngine.Con_Reportf("%d: i=%d surf_index=%d patch=%d(%#x) => emissive=(%f,%f,%f)\n", emissive_surfaces_count, i, surface_index, !!psurf, psurf?psurf->flags:0, emissive[0], emissive[1], emissive[2]); + DEBUG("%d: i=%d surf_index=%d tex_id=%d patch=%d(%#x) => emissive=(%f,%f,%f)", emissive_surfaces_count, i, surface_index, tex_id, !!psurf, psurf?psurf->flags:0, emissive[0], emissive[1], emissive[2]); if (emissive_surfaces_count == MAX_SURFACE_LIGHTS) { - gEngine.Con_Printf(S_ERROR "Too many emissive surfaces for model %s: max=%d\n", mod->name, MAX_SURFACE_LIGHTS); + ERR("Too many emissive surfaces for model %s: max=%d", mod->name, MAX_SURFACE_LIGHTS); break; } @@ -971,8 +1201,17 @@ void R_VkBrushModelCollectEmissiveSurfaces( const struct model_s *mod, qboolean } // Assign the emissive value to the right geometry - VectorCopy(polylight.emissive, bmodel->render_model.geometries[bmodel->surface_to_geometry_index[s->model_surface_index]].emissive); + const int geom_index = bmodel->surface_to_geometry_index[s->model_surface_index]; + geom_indices[i] = geom_index; + VectorCopy(polylight.emissive, bmodel->render_model.geometries[geom_index].emissive); } - gEngine.Con_Reportf("Loaded %d polylights for %s model %s\n", emissive_surfaces_count, is_static ? "static" : "movable", mod->name); + if (emissive_surfaces_count > 0) { + // Update emissive values in kusochki. This is required because initial VK_BrushModelLoad happens before we've read + // RAD data in vk_light.c, so the emissive values are empty. This is the place and time where we actually get to + // know them, so let's fixup things. + // TODO minor optimization: sort geom_indices to have a better chance for them to be sequential + R_RenderModelUpdateMaterials(&bmodel->render_model, geom_indices, emissive_surfaces_count); + INFO("Loaded %d polylights for %s model %s", emissive_surfaces_count, is_static ? "static" : "movable", mod->name); + } } diff --git a/ref/vk/vk_brush.h b/ref/vk/vk_brush.h index e55565a5..f128c61b 100644 --- a/ref/vk/vk_brush.h +++ b/ref/vk/vk_brush.h @@ -12,10 +12,9 @@ qboolean VK_BrushInit( void ); void VK_BrushShutdown( void ); qboolean VK_BrushModelLoad(struct model_s *mod); -void VK_BrushModelDestroy(struct model_s *mod); +void VK_BrushModelDestroyAll( void ); void VK_BrushModelDraw( const cl_entity_t *ent, int render_mode, float blend, const matrix4x4 model ); -void VK_BrushStatsClear( void ); const texture_t *R_TextureAnimation( const cl_entity_t *ent, const msurface_t *s, const struct texture_s *base_override ); diff --git a/ref/vk/vk_buffer.h b/ref/vk/vk_buffer.h index a9a54683..5e977fe0 100644 --- a/ref/vk/vk_buffer.h +++ b/ref/vk/vk_buffer.h @@ -2,6 +2,7 @@ #include "vk_core.h" #include "vk_devmem.h" +#include "r_flipping.h" #include "alolcator.h" typedef struct vk_buffer_s { @@ -17,18 +18,6 @@ void VK_BufferDestroy(vk_buffer_t *buf); VkDeviceAddress R_VkBufferGetDeviceAddress(VkBuffer buffer); - -typedef struct { - alo_ring_t ring; - uint32_t frame_offsets[2]; -} r_flipping_buffer_t; - -void R_FlippingBuffer_Init(r_flipping_buffer_t *flibuf, uint32_t size); -void R_FlippingBuffer_Clear(r_flipping_buffer_t *flibuf); -uint32_t R_FlippingBuffer_Alloc(r_flipping_buffer_t* flibuf, uint32_t size, uint32_t align); -void R_FlippingBuffer_Flip(r_flipping_buffer_t* flibuf); - - typedef struct { r_flipping_buffer_t dynamic; uint32_t static_size; diff --git a/ref/vk/vk_common.h b/ref/vk/vk_common.h index e602047b..9c4ad2e7 100644 --- a/ref/vk/vk_common.h +++ b/ref/vk/vk_common.h @@ -56,5 +56,12 @@ inline static int clampi32(int v, int min, int max) { return v; } +typedef struct { + const char *s; + int len; +} const_string_view_t; + +int stringViewCmp(const_string_view_t sv, const char* s); + extern ref_api_t gEngine; extern ref_globals_t *gpGlobals; diff --git a/ref/vk/vk_core.c b/ref/vk/vk_core.c index 779cea65..336c42a5 100644 --- a/ref/vk/vk_core.c +++ b/ref/vk/vk_core.c @@ -22,6 +22,8 @@ #include "vk_sprite.h" #include "vk_beams.h" #include "vk_combuf.h" +#include "vk_entity_data.h" +#include "vk_logs.h" // FIXME move this rt-specific stuff out #include "vk_light.h" @@ -34,6 +36,8 @@ #include "com_strings.h" #include "eiface.h" +#include "debugbreak.h" + #include #include @@ -127,7 +131,7 @@ VkBool32 VKAPI_PTR debugCallback( #ifdef _MSC_VER __debugbreak(); #else - __builtin_trap(); + debug_break(); #endif } return VK_FALSE; @@ -695,7 +699,9 @@ qboolean R_VkInit( void ) vk_core.validate = !!gEngine.Sys_CheckParm("-vkvalidate"); vk_core.debug = vk_core.validate || !!(gEngine.Sys_CheckParm("-vkdebug") || gEngine.Sys_CheckParm("-gldebug")); vk_core.rtx = false; + VK_LoadCvars(); + VK_LogsReadCvar(); R_SpeedsInit(); @@ -808,6 +814,10 @@ qboolean R_VkInit( void ) void R_VkShutdown( void ) { XVK_CHECK(vkDeviceWaitIdle(vk_core.device)); + VK_EntityDataClear(); + + R_SpriteShutdown(); + if (vk_core.rtx) { VK_LightsShutdown(); @@ -830,7 +840,7 @@ void R_VkShutdown( void ) { VK_DescriptorShutdown(); R_VkStagingShutdown(); - + R_VkCombuf_Destroy(); VK_DevMemDestroy(); diff --git a/ref/vk/vk_core.h b/ref/vk/vk_core.h index be3f496f..55112fa0 100644 --- a/ref/vk/vk_core.h +++ b/ref/vk/vk_core.h @@ -203,6 +203,7 @@ do { \ X(vkQueueSubmit) \ X(vkQueuePresentKHR) \ X(vkWaitForFences) \ + X(vkWaitSemaphores) \ X(vkResetFences) \ X(vkCreateSemaphore) \ X(vkDestroySemaphore) \ diff --git a/ref/vk/vk_cvar.c b/ref/vk/vk_cvar.c index 71b43943..92ed8768 100644 --- a/ref/vk/vk_cvar.c +++ b/ref/vk/vk_cvar.c @@ -19,7 +19,10 @@ void VK_LoadCvars( void ) ui_infotool = gEngine.Cvar_Get( "ui_infotool", "0", FCVAR_CHEAT, "DEBUG: print entity info under crosshair" ); vk_only = gEngine.Cvar_Get( "vk_only", "0", FCVAR_GLCONFIG, "Full disable Ray Tracing pipeline" ); vk_device_target_id = gEngine.Cvar_Get( "vk_device_target_id", "", FCVAR_GLCONFIG, "Selected video device id" ); + + vk_debug_log = gEngine.Cvar_Get("vk_debug_log", "", FCVAR_GLCONFIG, "List of modules to enable debug logs for"); } + void VK_LoadCvarsAfterInit( void ) { vk_rtx_extension = gEngine.Cvar_Get( "vk_rtx_extension", vk_core.rtx ? "1" : "0", FCVAR_READ_ONLY, "" ); diff --git a/ref/vk/vk_cvar.h b/ref/vk/vk_cvar.h index 5ac4c08b..130aba45 100644 --- a/ref/vk/vk_cvar.h +++ b/ref/vk/vk_cvar.h @@ -27,6 +27,7 @@ void VK_LoadCvarsAfterInit( void ); X(vk_rtx_extension) \ X(vk_only) \ X(vk_device_target_id) \ + X(vk_debug_log) \ #define EXTERN_CVAR(cvar) extern cvar_t *cvar; DECLARE_CVAR(EXTERN_CVAR) diff --git a/ref/vk/vk_entity_data.c b/ref/vk/vk_entity_data.c new file mode 100644 index 00000000..27ef86e6 --- /dev/null +++ b/ref/vk/vk_entity_data.c @@ -0,0 +1,57 @@ +#include "vk_entity_data.h" + +#include "vk_common.h" // ASSERT + +#include // NULL + +// TODO proper hash map with dynamic size, etc +#define MAX_ENTITIES 1024 + +typedef struct { + const struct cl_entity_s *entity; + void *userdata; + entity_data_dtor_f *dtor; +} entity_data_cache_entry_t; + +struct { + int entries_count; + entity_data_cache_entry_t entries[MAX_ENTITIES]; +} g_entdata; + +void* VK_EntityDataGet(const struct cl_entity_s* entity) { + for (int i = 0; i < g_entdata.entries_count; ++i) { + entity_data_cache_entry_t *const entry = g_entdata.entries + i; + if (entry->entity == entity) + return entry->userdata; + } + + return NULL; +} + +void VK_EntityDataClear(void) { + for (int i = 0; i < g_entdata.entries_count; ++i) { + entity_data_cache_entry_t *const entry = g_entdata.entries + i; + entry->dtor(entry->userdata); + } + + g_entdata.entries_count = 0; +} + +void VK_EntityDataSet(const struct cl_entity_s* entity, void *userdata, entity_data_dtor_f *dtor) { + for (int i = 0; i < g_entdata.entries_count; ++i) { + entity_data_cache_entry_t *const entry = g_entdata.entries + i; + if (entry->entity == entity) { + entry->dtor(entry->userdata); + entry->userdata = userdata; + entry->dtor = dtor; + return; + } + } + + ASSERT(g_entdata.entries_count < MAX_ENTITIES); + entity_data_cache_entry_t *const entry = g_entdata.entries + g_entdata.entries_count; + entry->entity = entity; + entry->userdata = userdata; + entry->dtor = dtor; + ++g_entdata.entries_count; +} diff --git a/ref/vk/vk_entity_data.h b/ref/vk/vk_entity_data.h new file mode 100644 index 00000000..a9bfb0c0 --- /dev/null +++ b/ref/vk/vk_entity_data.h @@ -0,0 +1,16 @@ +#pragma once + +struct cl_entity_s; +void* VK_EntityDataGet(const struct cl_entity_s*); + +typedef void (entity_data_dtor_f)(void*); + +// Will destroy and overwrite the older userdata if it exists. +// TODO: Make sure that the older userdata is not used (i.e. in parallel on GPU for rendering a still in-flight frame). +// This'd require a proper usage tracking (e.g. using refcounts) with changes to the rest of the renderer. +// Someday... +void VK_EntityDataSet(const struct cl_entity_s*, void *userdata, entity_data_dtor_f *dtor); + +void VK_EntityDataClear(void); + +// TODO a function to LRU-clear userdata that hasn't been used for a few frames diff --git a/ref/vk/vk_geometry.c b/ref/vk/vk_geometry.c index fd6609c1..3ba861ea 100644 --- a/ref/vk/vk_geometry.c +++ b/ref/vk/vk_geometry.c @@ -1,7 +1,9 @@ #include "vk_geometry.h" #include "vk_buffer.h" #include "vk_staging.h" -#include "vk_framectl.h" // MAX_CONCURRENT_FRAMES +#include "r_speeds.h" + +#define MODULE_NAME "geom" #define MAX_BUFFER_VERTICES_STATIC (128 * 1024) #define MAX_BUFFER_INDICES_STATIC (MAX_BUFFER_VERTICES_STATIC * 3) @@ -13,17 +15,85 @@ #define GEOMETRY_BUFFER_SIZE (GEOMETRY_BUFFER_STATIC_SIZE + GEOMETRY_BUFFER_DYNAMIC_SIZE) +// TODO profiler counters + static struct { vk_buffer_t buffer; - r_debuffer_t alloc; + r_blocks_t alloc; + + struct { + int vertices, indices; + int dyn_vertices, dyn_indices; + } stats; } g_geom; -qboolean R_GeometryBufferAllocAndLock( r_geometry_buffer_lock_t *lock, int vertex_count, int index_count, r_geometry_lifetime_t lifetime ) { +r_geometry_range_t R_GeometryRangeAlloc(int vertices, int indices) { + const uint32_t vertices_size = vertices * sizeof(vk_vertex_t); + const uint32_t indices_size = indices * sizeof(uint16_t); + const uint32_t total_size = vertices_size + indices_size; + + r_geometry_range_t ret = { + .block_handle = R_BlockAllocLong(&g_geom.alloc, total_size, sizeof(vk_vertex_t)), + }; + + if (!ret.block_handle.size) + return ret; + + ret.vertices.unit_offset = ret.block_handle.offset / sizeof(vk_vertex_t); + ret.indices.unit_offset = (ret.block_handle.offset + vertices_size) / sizeof(uint16_t); + + ret.vertices.count = vertices; + ret.indices.count = indices; + + g_geom.stats.indices += indices; + g_geom.stats.vertices += vertices; + + return ret; +} + +void R_GeometryRangeFree(const r_geometry_range_t* range) { + R_BlockRelease(&range->block_handle); + + g_geom.stats.indices -= range->indices.count; + g_geom.stats.vertices -= range->vertices.count; +} + +r_geometry_range_lock_t R_GeometryRangeLock(const r_geometry_range_t *range) { + const vk_staging_buffer_args_t staging_args = { + .buffer = g_geom.buffer.buffer, + .offset = range->block_handle.offset, + .size = range->block_handle.size, + .alignment = 4, + }; + + const vk_staging_region_t staging = R_VkStagingLockForBuffer(staging_args); + ASSERT(staging.ptr); + + const uint32_t vertices_size = range->vertices.count * sizeof(vk_vertex_t); + + ASSERT( range->block_handle.offset % sizeof(vk_vertex_t) == 0 ); + ASSERT( (range->block_handle.offset + vertices_size) % sizeof(uint16_t) == 0 ); + + return (r_geometry_range_lock_t){ + .vertices = (vk_vertex_t *)staging.ptr, + .indices = (uint16_t *)((char*)staging.ptr + vertices_size), + .impl_ = { + .staging_handle = staging.handle, + }, + }; +} + +void R_GeometryRangeUnlock(const r_geometry_range_lock_t *lock) { + R_VkStagingUnlock(lock->impl_.staging_handle); +} + +qboolean R_GeometryBufferAllocOnceAndLock(r_geometry_buffer_lock_t *lock, int vertex_count, int index_count) { const uint32_t vertices_size = vertex_count * sizeof(vk_vertex_t); const uint32_t indices_size = index_count * sizeof(uint16_t); const uint32_t total_size = vertices_size + indices_size; - const uint32_t offset = R_DEBuffer_Alloc(&g_geom.alloc, (lifetime == LifetimeSingleFrame) ? LifetimeDynamic : LifetimeStatic, total_size, sizeof(vk_vertex_t)); + const uint32_t offset = R_BlockAllocOnce(&g_geom.alloc, total_size, sizeof(vk_vertex_t)); + if (offset == ALO_ALLOC_FAILED) { /* gEngine.Con_Printf(S_ERROR "Cannot allocate %s geometry buffer for %d vertices (%d bytes) and %d indices (%d bytes)\n", */ /* lifetime == LifetimeSingleFrame ? "dynamic" : "static", */ @@ -64,6 +134,9 @@ qboolean R_GeometryBufferAllocAndLock( r_geometry_buffer_lock_t *lock, int verte }; } + g_geom.stats.dyn_vertices += vertex_count; + g_geom.stats.dyn_indices += index_count; + return true; } @@ -72,7 +145,9 @@ void R_GeometryBufferUnlock( const r_geometry_buffer_lock_t *lock ) { } void R_GeometryBuffer_MapClear( void ) { - R_DEBuffer_Init(&g_geom.alloc, GEOMETRY_BUFFER_STATIC_SIZE, GEOMETRY_BUFFER_DYNAMIC_SIZE); + // Obsolete, don't really need to do anything + // TODO for diag/debug reasons we might want to check that there are no leaks, i.e. + // allocated blocks count remains constant and doesn't grow between maps } qboolean R_GeometryBuffer_Init(void) { @@ -83,16 +158,24 @@ qboolean R_GeometryBuffer_Init(void) { (vk_core.rtx ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0))) return false; - R_GeometryBuffer_MapClear(); +#define EXPECTED_ALLOCS 1024 + R_BlocksCreate(&g_geom.alloc, GEOMETRY_BUFFER_SIZE, GEOMETRY_BUFFER_DYNAMIC_SIZE, EXPECTED_ALLOCS); + + R_SPEEDS_METRIC(g_geom.alloc.allocated_long, "used", kSpeedsMetricBytes); + R_SPEEDS_METRIC(g_geom.stats.vertices, "vertices", kSpeedsMetricCount); + R_SPEEDS_METRIC(g_geom.stats.indices, "indices", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_geom.stats.dyn_vertices, "dyn_vertices", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_geom.stats.dyn_indices, "dyn_indices", kSpeedsMetricCount); return true; } void R_GeometryBuffer_Shutdown(void) { + R_BlocksDestroy(&g_geom.alloc); VK_BufferDestroy( &g_geom.buffer ); } void R_GeometryBuffer_Flip(void) { - R_DEBuffer_Flip(&g_geom.alloc); + R_BlocksClearOnce(&g_geom.alloc); } VkBuffer R_GeometryBuffer_Get(void) { diff --git a/ref/vk/vk_geometry.h b/ref/vk/vk_geometry.h index 90aa31d9..2ba36165 100644 --- a/ref/vk/vk_geometry.h +++ b/ref/vk/vk_geometry.h @@ -1,5 +1,6 @@ #pragma once #include "vk_common.h" +#include "r_block.h" #include "vk_core.h" #include @@ -24,6 +25,35 @@ typedef struct vk_vertex_s { float pad4_[3]; } vk_vertex_t; +typedef struct { + struct { + int count, unit_offset; + } vertices; + + struct { + int count, unit_offset; + } indices; + + r_block_t block_handle; +} r_geometry_range_t; + +// Allocates a range in geometry buffer with a long lifetime +r_geometry_range_t R_GeometryRangeAlloc(int vertices, int indices); +void R_GeometryRangeFree(const r_geometry_range_t*); + +typedef struct { + vk_vertex_t *vertices; + uint16_t *indices; + + struct { + int staging_handle; + } impl_; +} r_geometry_range_lock_t; + +// Lock staging memory for uploading +r_geometry_range_lock_t R_GeometryRangeLock(const r_geometry_range_t *range); +void R_GeometryRangeUnlock(const r_geometry_range_lock_t *lock); + typedef struct { struct { vk_vertex_t *ptr; @@ -47,9 +77,8 @@ typedef enum { LifetimeSingleFrame } r_geometry_lifetime_t; -qboolean R_GeometryBufferAllocAndLock( r_geometry_buffer_lock_t *lock, int vertex_count, int index_count, r_geometry_lifetime_t lifetime ); +qboolean R_GeometryBufferAllocOnceAndLock(r_geometry_buffer_lock_t *lock, int vertex_count, int index_count); void R_GeometryBufferUnlock( const r_geometry_buffer_lock_t *lock ); -//void R_VkGeometryBufferFree( int handle ); void R_GeometryBuffer_MapClear( void ); // Free the entire buffer for a new map diff --git a/ref/vk/vk_light.c b/ref/vk/vk_light.c index 6c5f8583..74672345 100644 --- a/ref/vk/vk_light.c +++ b/ref/vk/vk_light.c @@ -10,6 +10,8 @@ #include "profiler.h" #include "vk_staging.h" #include "r_speeds.h" +#include "vk_logs.h" +#include "vk_framectl.h" #include "mod_local.h" #include "xash3d_mathlib.h" @@ -23,6 +25,9 @@ #include "pm_defs.h" #include "pmtrace.h" +#define MODULE_NAME "light" +#define LOG_MODULE LogModule_Lights + #define PROFILER_SCOPES(X) \ X(finalize , "RT_LightsFrameEnd"); \ X(emissive_surface, "VK_LightsAddEmissiveSurface"); \ @@ -104,16 +109,15 @@ qboolean VK_LightsInit( void ) { return false; } - R_SpeedsRegisterMetric(&g_lights_.stats.dirty_cells, "lights_dirty_cells", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_lights_.stats.dirty_cells_size, "lights_dirty_cells_size", kSpeedsMetricBytes); - R_SpeedsRegisterMetric(&g_lights_.stats.ranges_uploaded, "lights_ranges_uploaded", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_lights_.num_polygons, "lights_polygons", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_lights_.num_point_lights, "lights_point", kSpeedsMetricCount); - - R_SpeedsRegisterMetric(&g_lights_.stats.dynamic_polygons, "lights_polygons_dynamic", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_lights_.stats.dynamic_points, "lights_point_dynamic", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_lights_.stats.dlights, "lights_dlights", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_lights_.stats.elights, "lights_elights", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.stats.dirty_cells, "dirty_cells", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.stats.dirty_cells_size, "dirty_cells_size", kSpeedsMetricBytes); + R_SPEEDS_COUNTER(g_lights_.stats.ranges_uploaded, "ranges_uploaded", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.num_polygons, "polygons", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.num_point_lights, "points", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.stats.dynamic_polygons, "polygons_dynamic", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.stats.dynamic_points, "points_dynamic", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.stats.dlights, "dlights", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_lights_.stats.elights, "elights", kSpeedsMetricCount); return true; } @@ -153,7 +157,7 @@ static struct { } g_lights_bsp = {0}; -static void loadRadData( const model_t *map, const char *fmt, ... ) { +static qboolean loadRadData( const model_t *map, const char *fmt, ... ) { fs_offset_t size; char *data; byte *buffer; @@ -167,11 +171,11 @@ static void loadRadData( const model_t *map, const char *fmt, ... ) { buffer = gEngine.fsapi->LoadFile( filename, &size, false); if (!buffer) { - gEngine.Con_Printf(S_ERROR "Couldn't load RAD data from file %s, the map will be completely black\n", filename); - return; + DEBUG("Couldn't load RAD data from file %s", filename); + return false; } - gEngine.Con_Reportf("Loading RAD data from file %s\n", filename); + DEBUG("Loading RAD data from file %s", filename); data = (char*)buffer; for (;;) { @@ -189,7 +193,7 @@ static void loadRadData( const model_t *map, const char *fmt, ... ) { name[0] = '\0'; num = sscanf(data, "%s %f %f %f %f", name, &r, &g, &b, &scale); - gEngine.Con_Printf("raw rad entry (%d): %s %f %f %f %f\n", num, name, r, g, b, scale); + //DEBUG("raw rad entry (%d): %s %f %f %f %f", num, name, r, g, b, scale); if (Q_strstr(name, "//") != NULL) { num = 0; } @@ -204,12 +208,12 @@ static void loadRadData( const model_t *map, const char *fmt, ... ) { } else if (num == 4) { // Ok, rgb only, no scaling } else { - gEngine.Con_Printf( "skipping rad entry %s\n", name[0] ? name : "(empty)" ); + DEBUG( "skipping rad entry %s", name[0] ? name : "(empty)" ); num = 0; } if (num != 0) { - gEngine.Con_Printf("rad entry (%d): %s %f %f %f (%f)\n", num, name, r, g, b, scale); + DEBUG("rad entry (%d): %s %f %f %f (%f)", num, name, r, g, b, scale); { const char *wad_name = NULL; @@ -261,8 +265,10 @@ static void loadRadData( const model_t *map, const char *fmt, ... ) { // See DIRECT_SCALE in qrad/lightmap.c VectorScale(etex->emissive, 0.1f, etex->emissive); + DEBUG(" texture(%s?, %d) set emissive(%f, %f, %f)", texture_name, tex_id, etex->emissive[0], etex->emissive[1], etex->emissive[2]); + if (!enabled) - gEngine.Con_Reportf("rad entry %s disabled due to zero intensity\n", name); + DEBUG("rad entry %s disabled due to zero intensity", name); } } } @@ -274,6 +280,7 @@ static void loadRadData( const model_t *map, const char *fmt, ... ) { } Mem_Free(buffer); + return true; } static void leafAccumPrepare( void ) { @@ -321,7 +328,7 @@ static int leafAccumAddPotentiallyVisibleFromLeaf(const model_t *const map, cons if (leafAccumAdd( pvs_leaf_index + 1 )) { leafs_added++; if (print_debug) - gEngine.Con_Reportf(" .%d", pvs_leaf_index + 1); + DEBUG(" .%d", pvs_leaf_index + 1); } } } @@ -336,7 +343,7 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMapSurface( const msurface_t *surf ) { const qboolean verbose_debug = false; if (surf_index < 0 || surf_index >= g_lights_bsp.num_surfaces) { - gEngine.Con_Printf(S_ERROR "FIXME not implemented: attempting to add non-static polygon light\n"); + ERR("FIXME not implemented: attempting to add non-static polygon light"); return NULL; } @@ -350,16 +357,16 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMapSurface( const msurface_t *surf ) { // Enumerate all the map leafs and pick ones that have this surface referenced if (verbose_debug) - gEngine.Con_Reportf("Collecting visible leafs for surface %d:", surf_index); + DEBUG("Collecting visible leafs for surface %d:", surf_index); for (int i = 1; i <= map->numleafs; ++i) { const mleaf_t *leaf = map->leafs + i; - //if (verbose_debug) gEngine.Con_Reportf(" leaf %d(c%d)/%d:", i, leaf->cluster, map->numleafs); + //if (verbose_debug) DEBUG(" leaf %d(c%d)/%d:", i, leaf->cluster, map->numleafs); for (int j = 0; j < leaf->nummarksurfaces; ++j) { const msurface_t *leaf_surf = leaf->firstmarksurface[j]; if (leaf_surf != surf) { /* if (verbose_debug) { */ /* const int leaf_surf_index = leaf_surf - map->surfaces; */ - /* gEngine.Con_Reportf(" !%d", leaf_surf_index); */ + /* DEBUG(" !%d", leaf_surf_index); */ /* } */ continue; } @@ -367,7 +374,7 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMapSurface( const msurface_t *surf ) { // FIXME split direct leafs marking from pvs propagation leafs_direct++; if (leafAccumAdd( i )) { - if (verbose_debug) gEngine.Con_Reportf(" %d", i); + if (verbose_debug) DEBUG(" %d", i); } else { // This leaf was already added earlier by PVS // but it really should be counted as direct @@ -378,10 +385,10 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMapSurface( const msurface_t *surf ) { leafs_pvs += leafAccumAddPotentiallyVisibleFromLeaf(map, leaf, verbose_debug); } - //if (verbose_debug) gEngine.Con_Reportf("\n"); + //if (verbose_debug) DEBUG("\n"); } if (verbose_debug) - gEngine.Con_Reportf(" (sum=%d, direct=%d, pvs=%d)\n", g_lights_bsp.accum.count, leafs_direct, leafs_pvs); + DEBUG(" (sum=%d, direct=%d, pvs=%d)", g_lights_bsp.accum.count, leafs_direct, leafs_pvs); leafAccumFinalize(); @@ -431,7 +438,7 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMovingSurface( const msurface_t *surf, Matrix3x4_VectorTransform(*transform_row, bbox_center, origin); if (debug_dump_lights.enabled) { - gEngine.Con_Reportf("\torigin = %f, %f, %f, R = %f\n", + DEBUG("\torigin = %f, %f, %f, R = %f", origin[0], origin[1], origin[2], radius ); } @@ -442,7 +449,7 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMovingSurface( const msurface_t *surf, // (origin + radius will accidentally touch leafs that are really should not be affected) gEngine.R_FatPVS(origin, radius, g_lights_bsp.accum.visbytes, /*merge*/ false, /*fullvis*/ false); if (debug_dump_lights.enabled) - gEngine.Con_Reportf("Collecting visible leafs for moving surface %p: %f,%f,%f %f: ", surf, + DEBUG("Collecting visible leafs for moving surface %p: %f,%f,%f %f: ", surf, origin[0], origin[1], origin[2], radius); for (int i = 0; i <= map->numleafs; ++i) { @@ -454,7 +461,7 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMovingSurface( const msurface_t *surf, if (leafAccumAdd( i + 1 )) { if (debug_dump_lights.enabled) - gEngine.Con_Reportf(" %d", i + 1); + DEBUG(" %d", i + 1); } else { // This leaf was already added earlier by PVS // but it really should be counted as direct @@ -463,7 +470,7 @@ vk_light_leaf_set_t *getMapLeafsAffectedByMovingSurface( const msurface_t *surf, } if (debug_dump_lights.enabled) - gEngine.Con_Reportf(" (sum=%d, direct=%d, pvs=%d)\n", g_lights_bsp.accum.count, leafs_direct, leafs_pvs); + DEBUG(" (sum=%d, direct=%d, pvs=%d)", g_lights_bsp.accum.count, leafs_direct, leafs_pvs); leafAccumFinalize(); @@ -509,7 +516,7 @@ void RT_LightsNewMap( const struct model_s *map ) { ASSERT(g_lights.map.grid_cells < MAX_LIGHT_CLUSTERS); - gEngine.Con_Reportf("Map mins:(%f, %f, %f), maxs:(%f, %f, %f), size:(%f, %f, %f), min_cell:(%f, %f, %f) cells:(%d, %d, %d); total: %d\n", + DEBUG("Map mins:(%f, %f, %f), maxs:(%f, %f, %f), size:(%f, %f, %f), min_cell:(%f, %f, %f) cells:(%d, %d, %d); total: %d", map->mins[0], map->mins[1], map->mins[2], map->maxs[0], map->maxs[1], map->maxs[2], map_size[0], map_size[1], map_size[2], @@ -534,7 +541,7 @@ static qboolean addSurfaceLightToCell( int cell_index, int polygon_light_index ) } if (debug_dump_lights.enabled) { - gEngine.Con_Reportf(" adding polygon light %d to cell %d (count=%d)\n", polygon_light_index, cell_index, cluster->num_polygons+1); + DEBUG(" adding polygon light %d to cell %d (count=%d)", polygon_light_index, cell_index, cluster->num_polygons+1); } cluster->polygons[cluster->num_polygons++] = polygon_light_index; @@ -552,7 +559,7 @@ static qboolean addLightToCell( int cell_index, int light_index ) { return false; if (debug_dump_lights.enabled) { - gEngine.Con_Reportf(" adding point light %d to cell %d (count=%d)\n", light_index, cell_index, cluster->num_point_lights+1); + DEBUG(" adding point light %d to cell %d (count=%d)", light_index, cell_index, cluster->num_point_lights+1); } cluster->point_lights[cluster->num_point_lights++] = light_index; @@ -609,7 +616,7 @@ static void addLightIndexToLeaf( const mleaf_t *leaf, int index ) { const int max_z = ceilf(leaf->minmaxs[5] / LIGHT_GRID_CELL_SIZE); if (debug_dump_lights.enabled) { - gEngine.Con_Reportf(" adding leaf %d min=(%d, %d, %d), max=(%d, %d, %d) total=%d\n", + DEBUG(" adding leaf %d min=(%d, %d, %d), max=(%d, %d, %d) total=%d", leaf->cluster, min_x, min_y, min_z, max_x, max_y, max_z, @@ -685,7 +692,7 @@ static int addPointLight( const vec3_t origin, const vec3_t color, float radius, } if (debug_dump_lights.enabled) { - gEngine.Con_Printf("point light %d: origin=(%f %f %f) R=%f color=(%f %f %f)\n", index, + DEBUG("point light %d: origin=(%f %f %f) R=%f color=(%f %f %f)", index, origin[0], origin[1], origin[2], radius, color[0], color[1], color[2]); } @@ -717,7 +724,7 @@ static int addSpotLight( const vk_light_entity_t *le, float radius, int lightsty } if (debug_dump_lights.enabled) { - gEngine.Con_Printf("%s light %d: origin=(%f %f %f) color=(%f %f %f) dir=(%f %f %f)\n", + DEBUG("%s light %d: origin=(%f %f %f) color=(%f %f %f) dir=(%f %f %f)", le->type == LightTypeEnvironment ? "environment" : "spot", index, le->origin[0], le->origin[1], le->origin[2], @@ -835,7 +842,7 @@ void RT_LightAddFlashlight(const struct cl_entity_s *ent, qboolean local_player le.stopdot2 = cosf(_cone2 * M_PI / 180.f); /* - gEngine.Con_Printf("flashlight: origin=(%f %f %f) color=(%f %f %f) dir=(%f %f %f)\n", + DEBUG("flashlight: origin=(%f %f %f) color=(%f %f %f) dir=(%f %f %f)", le.origin[0], le.origin[1], le.origin[2], le.color[0], le.color[1], le.color[2], le.dir[0], le.dir[1], le.dir[2]); @@ -916,8 +923,10 @@ void RT_LightsLoadBegin( const struct model_s *map ) { name_len -= 4; memset(g_lights_.map.emissive_textures, 0, sizeof(g_lights_.map.emissive_textures)); - loadRadData( map, "maps/lights.rad" ); - loadRadData( map, "%.*s.rad", name_len, map->name ); + const qboolean loaded = loadRadData( map, "maps/lights.rad" ) | loadRadData( map, "%.*s.rad", name_len, map->name ); + if (!loaded) { + ERR("No RAD files loaded. The map will be completely black"); + } } // Clear static lights counts @@ -960,15 +969,13 @@ qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id ) { ASSERT(texture_id >= 0); ASSERT(texture_id < MAX_TEXTURES); - { - vk_emissive_texture_t *const etex = g_lights_.map.emissive_textures + texture_id; - if (etex->set) { - VectorCopy(etex->emissive, out); - return true; - } else { - VectorClear(out); - return false; - } + vk_emissive_texture_t *const etex = g_lights_.map.emissive_textures + texture_id; + if (etex->set) { + VectorCopy(etex->emissive, out); + return true; + } else { + VectorClear(out); + return false; } } @@ -984,7 +991,7 @@ static void addPolygonLightIndexToLeaf(const mleaf_t* leaf, int poly_index) { const qboolean not_visible = false; //TODO static_map && !canSurfaceLightAffectAABB(world, geom->surf, esurf->emissive, leaf->minmaxs); if (debug_dump_lights.enabled) { - gEngine.Con_Reportf(" adding leaf %d min=(%d, %d, %d), max=(%d, %d, %d) total=%d\n", + DEBUG(" adding leaf %d min=(%d, %d, %d), max=(%d, %d, %d) total=%d", leaf->cluster, min_x, min_y, min_z, max_x, max_y, max_z, @@ -1059,8 +1066,13 @@ static void addPolygonLeafSetToClusters(const vk_light_leaf_set_t *leafs, int po } int RT_LightAddPolygon(const rt_light_add_polygon_t *addpoly) { + // FIXME We're adding lights directly from vk_brush.c w/o knowing whether current frame is + // ray traced. If not, this will break. + if (addpoly->dynamic && !vk_frame.rtx_enabled) + return -1; + if (g_lights_.num_polygons == MAX_SURFACE_LIGHTS) { - gEngine.Con_Printf(S_ERROR "Max number of polygon lights %d reached\n", MAX_SURFACE_LIGHTS); + ERR("Max number of polygon lights %d reached", MAX_SURFACE_LIGHTS); return -1; } @@ -1103,7 +1115,7 @@ int RT_LightAddPolygon(const rt_light_add_polygon_t *addpoly) { VectorM(1.f / poly->vertices.count, poly->center, poly->center); if (!addpoly->dynamic || debug_dump_lights.enabled) { - gEngine.Con_Reportf("added polygon light index=%d color=(%f, %f, %f) center=(%f, %f, %f) plane=(%f, %f, %f, %f) area=%f num_vertices=%d\n", + DEBUG("added polygon light index=%d color=(%f, %f, %f) center=(%f, %f, %f) plane=(%f, %f, %f, %f) area=%f num_vertices=%d", g_lights_.num_polygons, poly->emissive[0], poly->emissive[1], @@ -1329,7 +1341,7 @@ void RT_LightsFrameEnd( void ) { if (debug_dump_lights.enabled) { #if 0 // Print light grid stats - gEngine.Con_Reportf("Emissive surfaces found: %d\n", g_lights_.num_polygons); + DEBUG("Emissive surfaces found: %d", g_lights_.num_polygons); { #define GROUPSIZE 4 @@ -1340,10 +1352,10 @@ void RT_LightsFrameEnd( void ) { histogram[hist_index]++; } - gEngine.Con_Reportf("Built %d light clusters. Stats:\n", g_lights.map.grid_cells); - gEngine.Con_Reportf(" 0: %d\n", histogram[0]); + DEBUG("Built %d light clusters. Stats:", g_lights.map.grid_cells); + DEBUG(" 0: %d", histogram[0]); for (int i = 1; i < ARRAYSIZE(histogram); ++i) - gEngine.Con_Reportf(" %d-%d: %d\n", + DEBUG(" %d-%d: %d", (i - 1) * GROUPSIZE, i * GROUPSIZE - 1, histogram[i]); @@ -1354,7 +1366,7 @@ void RT_LightsFrameEnd( void ) { for (int i = 0; i < g_lights.map.grid_cells; ++i) { const vk_lights_cell_t *cluster = g_lights.cells + i; if (cluster->num_polygons > 0) { - gEngine.Con_Reportf(" cluster %d: polygons=%d\n", i, cluster->num_polygons); + DEBUG(" cluster %d: polygons=%d", i, cluster->num_polygons); } for (int j = 0; j < cluster->num_polygons; ++j) { @@ -1365,7 +1377,7 @@ void RT_LightsFrameEnd( void ) { } } - gEngine.Con_Reportf("Clusters with filtered lights: %d\n", num_clusters_with_lights_in_range); + DEBUG("Clusters with filtered lights: %d", num_clusters_with_lights_in_range); } #endif } diff --git a/ref/vk/vk_logs.c b/ref/vk/vk_logs.c new file mode 100644 index 00000000..970b4022 --- /dev/null +++ b/ref/vk/vk_logs.c @@ -0,0 +1,47 @@ +#include "vk_logs.h" +#include "vk_cvar.h" + +uint32_t g_log_debug_bits = 0; + +static const struct log_pair_t { + const char *name; + uint32_t bit; +} g_log_module_pairs[] = { + {"misc", LogModule_Misc}, + {"tex", LogModule_Textures}, + {"brush", LogModule_Brush}, + {"light", LogModule_Lights}, + {"studio", LogModule_Studio}, + {"patch", LogModule_Patch}, + {"mat", LogModule_Material}, + {"meat", LogModule_Meatpipe}, + {"rt", LogModule_RT}, +}; + +void VK_LogsReadCvar(void) { + g_log_debug_bits = 0; + const char *p = vk_debug_log->string; + while (*p) { + const char *next = Q_strchrnul(p, ','); + const const_string_view_t name = {p, next - p}; + uint32_t bit = 0; + + for (int i = 0; i < COUNTOF(g_log_module_pairs); ++i) { + const struct log_pair_t *const pair = g_log_module_pairs + i; + if (stringViewCmp(name, pair->name) == 0) { + bit = pair->bit; + break; + } + } + + if (!bit) { + gEngine.Con_Reportf(S_ERROR "Unknown log module \"%.*s\"\n", name.len, name.s); + } + + g_log_debug_bits |= bit; + + if (!*next) + break; + p = next + 1; + } +} diff --git a/ref/vk/vk_logs.h b/ref/vk/vk_logs.h new file mode 100644 index 00000000..44e13333 --- /dev/null +++ b/ref/vk/vk_logs.h @@ -0,0 +1,47 @@ +#pragma once + +#include "vk_common.h" + +enum { + LogModule_Misc = (1<<0), + LogModule_Textures = (1<<1), + LogModule_Brush = (1<<2), + LogModule_Lights = (1<<3), + LogModule_Studio = (1<<4), + LogModule_Patch = (1<<5), + LogModule_Material = (1<<6), + LogModule_Meatpipe = (1<<7), + LogModule_RT = (1<<8), +}; + +extern uint32_t g_log_debug_bits; + +// TODO: +// - load bits early at startup somehow. cvar is empty at init for some reason +// - module name in message +// - file:line in message + +#define DEBUG(msg, ...) \ + do { \ + if (g_log_debug_bits & (LOG_MODULE)) { \ + gEngine.Con_Reportf("vk: " msg "\n", ##__VA_ARGS__); \ + } \ + } while(0) + +#define WARN(msg, ...) \ + do { \ + gEngine.Con_Printf(S_WARN "vk: " msg "\n", ##__VA_ARGS__); \ + } while(0) + +#define ERR(msg, ...) \ + do { \ + gEngine.Con_Printf(S_ERROR "vk: " msg "\n", ##__VA_ARGS__); \ + } while(0) + +#define INFO(msg, ...) \ + do { \ + gEngine.Con_Printf("vk: " msg "\n", ##__VA_ARGS__); \ + } while(0) + +// Read debug-enabled modules from cvar +void VK_LogsReadCvar(void); diff --git a/ref/vk/vk_mapents.c b/ref/vk/vk_mapents.c index b028c11e..fde3ee2c 100644 --- a/ref/vk/vk_mapents.c +++ b/ref/vk/vk_mapents.c @@ -2,12 +2,15 @@ #include "vk_mapents.h" #include "vk_core.h" // TODO we need only pool from there, not the entire vulkan garbage #include "vk_textures.h" +#include "vk_logs.h" #include "eiface.h" // ARRAYSIZE #include "xash3d_mathlib.h" #include #include +#define LOG_MODULE LogModule_Patch + xvk_map_entities_t g_map_entities; static struct { @@ -19,7 +22,7 @@ static unsigned parseEntPropWadList(const char* value, string *out, unsigned bit int dst_left = sizeof(string) - 2; // ; \0 char *dst = *out; *dst = '\0'; - gEngine.Con_Reportf("WADS: %s\n", value); + DEBUG("WADS: %s", value); for (; *value;) { const char *file_begin = value; @@ -31,7 +34,7 @@ static unsigned parseEntPropWadList(const char* value, string *out, unsigned bit { const int len = value - file_begin; - gEngine.Con_Reportf("WAD: %.*s\n", len, file_begin); + DEBUG("WAD: %.*s", len, file_begin); if (len < dst_left) { Q_strncpy(dst, file_begin, len + 1); @@ -46,7 +49,7 @@ static unsigned parseEntPropWadList(const char* value, string *out, unsigned bit if (*value) value++; } - gEngine.Con_Reportf("wad list: %s\n", *out); + DEBUG("wad list: %s", *out); return bit; } @@ -78,7 +81,7 @@ static unsigned parseEntPropIntArray(const char* value, int_array_t *out, unsign } if (*value) { - gEngine.Con_Printf(S_ERROR "Error parsing mapents patch IntArray (wrong format? too many entries (max=%d)), portion not parsed: %s\n", MAX_INT_ARRAY_SIZE, value); + ERR("Error parsing mapents patch IntArray (wrong format? too many entries (max=%d)), portion not parsed: %s", MAX_INT_ARRAY_SIZE, value); } return retval; } @@ -86,7 +89,7 @@ static unsigned parseEntPropIntArray(const char* value, int_array_t *out, unsign static unsigned parseEntPropString(const char* value, string *out, unsigned bit) { const int len = Q_strlen(value); if (len >= sizeof(string)) - gEngine.Con_Printf(S_ERROR "Map entity value '%s' is too long, max length is %d\n", + ERR("Map entity value '%s' is too long, max length is %d", value, (int)sizeof(string)); Q_strncpy(*out, value, sizeof(*out)); return bit; @@ -234,7 +237,7 @@ static void fillLightFromProps( vk_light_entity_t *le, const entity_props_t *pro weirdGoldsrcLightScaling(le->color); } - gEngine.Con_Reportf("%s light %d (ent=%d): %s targetname=%s color=(%f %f %f) origin=(%f %f %f) style=%d R=%f dir=(%f %f %f) stopdot=(%f %f)\n", + DEBUG("%s light %d (ent=%d): %s targetname=%s color=(%f %f %f) origin=(%f %f %f) style=%d R=%f dir=(%f %f %f) stopdot=(%f %f)", patch ? "Patch" : "Added", g_map_entities.num_lights, entity_index, le->type == LightTypeEnvironment ? "environment" : le->type == LightTypeSpot ? "spot" : "point", @@ -253,7 +256,7 @@ static void addLightEntity( const entity_props_t *props, unsigned have_fields ) unsigned expected_fields = 0; if (g_map_entities.num_lights == ARRAYSIZE(g_map_entities.lights)) { - gEngine.Con_Printf(S_ERROR "Too many lights entities in map\n"); + ERR("Too many lights entities in map"); return; } @@ -284,7 +287,7 @@ static void addLightEntity( const entity_props_t *props, unsigned have_fields ) } if ((have_fields & expected_fields) != expected_fields) { - gEngine.Con_Printf(S_ERROR "Missing some fields for light entity\n"); + ERR("Missing some fields for light entity"); return; } @@ -309,11 +312,11 @@ static void addLightEntity( const entity_props_t *props, unsigned have_fields ) static void addTargetEntity( const entity_props_t *props ) { xvk_mapent_target_t *target = g_map_entities.targets + g_map_entities.num_targets; - gEngine.Con_Reportf("Adding target entity %s at (%f, %f, %f)\n", + DEBUG("Adding target entity %s at (%f, %f, %f)", props->targetname, props->origin[0], props->origin[1], props->origin[2]); if (g_map_entities.num_targets == MAX_MAPENT_TARGETS) { - gEngine.Con_Printf(S_ERROR "Too many map target entities\n"); + ERR("Too many map target entities"); return; } @@ -337,10 +340,10 @@ static void readWorldspawn( const entity_props_t *props ) { } static void readFuncWall( const entity_props_t *const props, uint32_t have_fields, int props_count ) { - gEngine.Con_Reportf("func_wall entity=%d model=\"%s\", props_count=%d\n", g_map_entities.entity_count, (have_fields & Field_model) ? props->model : "N/A", props_count); + DEBUG("func_wall entity=%d model=\"%s\", props_count=%d", g_map_entities.entity_count, (have_fields & Field_model) ? props->model : "N/A", props_count); if (g_map_entities.func_walls_count >= MAX_FUNC_WALL_ENTITIES) { - gEngine.Con_Printf(S_ERROR "Too many func_wall entities, max supported = %d\n", MAX_FUNC_WALL_ENTITIES); + ERR("Too many func_wall entities, max supported = %d", MAX_FUNC_WALL_ENTITIES); return; } @@ -388,7 +391,7 @@ static void addPatchSurface( const entity_props_t *props, uint32_t have_fields ) const int index = props->_xvk_surface_id.values[i]; xvk_patch_surface_t *psurf = NULL; if (index < 0 || index >= num_surfaces) { - gEngine.Con_Printf(S_ERROR "Incorrect patch for surface_index %d where numsurfaces=%d\n", index, num_surfaces); + ERR("Incorrect patch for surface_index %d where numsurfaces=%d", index, num_surfaces); continue; } @@ -405,14 +408,14 @@ static void addPatchSurface( const entity_props_t *props, uint32_t have_fields ) psurf = g_patch.surfaces + index; if (should_remove) { - gEngine.Con_Reportf("Patch: surface %d removed\n", index); + DEBUG("Patch: surface %d removed", index); psurf->flags = Patch_Surface_Delete; continue; } if (have_fields & Field__xvk_texture) { const int tex_id = XVK_FindTextureNamedLike( props->_xvk_texture ); - gEngine.Con_Reportf("Patch for surface %d with texture \"%s\" -> %d\n", index, props->_xvk_texture, tex_id); + DEBUG("Patch for surface %d with texture \"%s\" -> %d", index, props->_xvk_texture, tex_id); psurf->tex_id = tex_id; // Find texture_t for this index @@ -431,7 +434,7 @@ static void addPatchSurface( const entity_props_t *props, uint32_t have_fields ) if (have_fields & Field__light) { VectorScale(props->_light, 0.1f, psurf->emissive); psurf->flags |= Patch_Surface_Emissive; - gEngine.Con_Reportf("Patch for surface %d: assign emissive %f %f %f\n", index, + DEBUG("Patch for surface %d: assign emissive %f %f %f", index, psurf->emissive[0], psurf->emissive[1], psurf->emissive[2] @@ -442,13 +445,13 @@ static void addPatchSurface( const entity_props_t *props, uint32_t have_fields ) Vector4Copy(props->_xvk_svec, psurf->s_vec); Vector4Copy(props->_xvk_tvec, psurf->t_vec); psurf->flags |= Patch_Surface_STvecs; - gEngine.Con_Reportf("Patch for surface %d: assign stvec\n", index); + DEBUG("Patch for surface %d: assign stvec", index); } if (have_fields & Field__xvk_tex_scale) { Vector2Copy(props->_xvk_tex_scale, psurf->tex_scale); psurf->flags |= Patch_Surface_TexScale; - gEngine.Con_Reportf("Patch for surface %d: assign tex scale %f %f\n", + DEBUG("Patch for surface %d: assign tex scale %f %f", index, psurf->tex_scale[0], psurf->tex_scale[1] ); } @@ -456,7 +459,7 @@ static void addPatchSurface( const entity_props_t *props, uint32_t have_fields ) if (have_fields & Field__xvk_tex_offset) { Vector2Copy(props->_xvk_tex_offset, psurf->tex_offset); psurf->flags |= Patch_Surface_TexOffset; - gEngine.Con_Reportf("Patch for surface %d: assign tex offset %f %f\n", + DEBUG("Patch for surface %d: assign tex offset %f %f", index, psurf->tex_offset[0], psurf->tex_offset[1] ); } @@ -470,7 +473,7 @@ static void patchLightEntity( const entity_props_t *props, int ent_id, uint32_t vk_light_entity_t *const light = g_map_entities.lights + index; if (have_fields == Field__xvk_ent_id) { - gEngine.Con_Reportf("Deleting light entity (%d of %d) with index=%d\n", index, g_map_entities.num_lights, ent_id); + DEBUG("Deleting light entity (%d of %d) with index=%d", index, g_map_entities.num_lights, ent_id); // Mark it as deleted light->entity_index = -1; @@ -488,7 +491,7 @@ static void patchFuncWallEntity( const entity_props_t *props, uint32_t have_fiel if (have_fields & Field_origin) VectorCopy(props->origin, fw->origin); - gEngine.Con_Reportf("Patching ent=%d func_wall=%d %f %f %f\n", fw->entity_index, index, fw->origin[0], fw->origin[1], fw->origin[2]); + DEBUG("Patching ent=%d func_wall=%d %f %f %f", fw->entity_index, index, fw->origin[0], fw->origin[1], fw->origin[2]); } static void patchEntity( const entity_props_t *props, uint32_t have_fields ) { @@ -497,7 +500,7 @@ static void patchEntity( const entity_props_t *props, uint32_t have_fields ) { for (int i = 0; i < props->_xvk_ent_id.num; ++i) { const int ei = props->_xvk_ent_id.values[i]; if (ei < 0 || ei >= g_map_entities.entity_count) { - gEngine.Con_Printf(S_ERROR "_xvk_ent_id value %d is out of bounds, max=%d\n", ei, g_map_entities.entity_count); + ERR("_xvk_ent_id value %d is out of bounds, max=%d", ei, g_map_entities.entity_count); continue; } @@ -512,7 +515,7 @@ static void patchEntity( const entity_props_t *props, uint32_t have_fields ) { patchFuncWallEntity(props, have_fields, ref->index); break; default: - gEngine.Con_Printf(S_WARN "vk_mapents: trying to patch unsupported entity %d class %d\n", ei, ref->class); + WARN("vk_mapents: trying to patch unsupported entity %d class %d", ei, ref->class); } } @@ -523,7 +526,7 @@ static void parseEntities( char *string, qboolean is_patch ) { int props_count = 0; entity_props_t values; char *pos = string; - //gEngine.Con_Reportf("ENTITIES: %s\n", pos); + //DEBUG("ENTITIES: %s", pos); for (;;) { char key[1024]; char value[1024]; @@ -578,7 +581,7 @@ static void parseEntities( char *string, qboolean is_patch ) { g_map_entities.entity_count++; if (g_map_entities.entity_count == MAX_MAP_ENTITIES) { - gEngine.Con_Printf(S_ERROR "vk_mapents: too many entities, skipping the rest");\ + ERR("vk_mapents: too many entities, skipping the rest");\ break; } continue; @@ -593,12 +596,12 @@ static void parseEntities( char *string, qboolean is_patch ) { if (Q_strcmp(key, #name) == 0) { \ const unsigned bit = parseEntProp##kind(value, &values.name, Field_##name); \ if (bit == 0) { \ - gEngine.Con_Printf( S_ERROR "Error parsing entity property " #name ", invalid value: %s\n", value); \ + ERR("Error parsing entity property " #name ", invalid value: %s", value); \ } else have_fields |= bit; \ } else ENT_PROP_LIST(READ_FIELD) { - //gEngine.Con_Reportf("Unknown field %s with value %s\n", key, value); + //DEBUG("Unknown field %s with value %s", key, value); } ++props_count; #undef CHECK_FIELD @@ -629,14 +632,14 @@ static void orientSpotlights( void ) { target = findTargetByName(light->target_entity); if (!target) { - gEngine.Con_Printf(S_ERROR "Couldn't find target entity '%s' for spot light %d\n", light->target_entity, i); + ERR("Couldn't find target entity '%s' for spot light %d", light->target_entity, i); continue; } VectorSubtract(target->origin, light->origin, light->dir); VectorNormalize(light->dir); - gEngine.Con_Reportf("Light %d patched direction towards '%s': %f %f %f\n", i, target->targetname, + DEBUG("Light %d patched direction towards '%s': %f %f %f", i, target->targetname, light->dir[0], light->dir[1], light->dir[2]); } } @@ -652,10 +655,10 @@ static void parsePatches( const model_t *const map ) { } Q_snprintf(filename, sizeof(filename), "luchiki/%s.patch", map->name); - gEngine.Con_Reportf("Loading patches from file \"%s\"\n", filename); + DEBUG("Loading patches from file \"%s\"", filename); data = gEngine.fsapi->LoadFile( filename, 0, false ); if (!data) { - gEngine.Con_Reportf("No patch file \"%s\"\n", filename); + DEBUG("No patch file \"%s\"", filename); return; } diff --git a/ref/vk/vk_materials.c b/ref/vk/vk_materials.c index 831b0706..e73cdd84 100644 --- a/ref/vk/vk_materials.c +++ b/ref/vk/vk_materials.c @@ -3,9 +3,12 @@ #include "vk_mapents.h" #include "vk_const.h" #include "profiler.h" +#include "vk_logs.h" #include +#define LOG_MODULE LogModule_Material + #define MAX_INCLUDE_DEPTH 4 static xvk_material_t k_default_material = { @@ -38,7 +41,7 @@ static struct { static int loadTexture( const char *filename, qboolean force_reload ) { const uint64_t load_begin_ns = aprof_time_now_ns(); const int tex_id = force_reload ? XVK_LoadTextureReplace( filename, NULL, 0, 0 ) : VK_LoadTexture( filename, NULL, 0, 0 ); - gEngine.Con_Reportf("Loaded texture %s => %d\n", filename, tex_id); + DEBUG("Loaded texture %s => %d", filename, tex_id); g_stats.texture_loads++; g_stats.texture_load_duration_ns += aprof_time_now_ns() - load_begin_ns; return tex_id ? tex_id : -1; @@ -75,7 +78,7 @@ static void loadMaterialsFromFile( const char *filename, int depth ) { string basecolor_map, normal_map, metal_map, roughness_map; - gEngine.Con_Reportf("Loading materials from %s\n", filename); + DEBUG("Loading materials from %s", filename); if ( !data ) return; @@ -115,7 +118,7 @@ static void loadMaterialsFromFile( const char *filename, int depth ) { MAKE_PATH(texture_path, name); \ const int tex_id = loadTexture(texture_path, force_reload); \ if (tex_id < 0) { \ - gEngine.Con_Printf(S_ERROR "Failed to load texture \"%s\" for "#name"\n", name); \ + ERR("Failed to load texture \"%s\" for "#name"", name); \ } else { \ current_material.field = tex_id; \ } \ @@ -140,7 +143,7 @@ static void loadMaterialsFromFile( const char *filename, int depth ) { current_material.metalness = 1.f; } - gEngine.Con_Reportf("Creating%s material for texture %s(%d)\n", create?" new":"", + DEBUG("Creating%s material for texture %s(%d)", create?" new":"", findTexture(current_material_index)->name, current_material_index); g_materials.materials[current_material_index] = current_material; @@ -169,7 +172,7 @@ static void loadMaterialsFromFile( const char *filename, int depth ) { MAKE_PATH(include_path, value); loadMaterialsFromFile( include_path, depth - 1); } else { - gEngine.Con_Printf(S_ERROR "material: max include depth %d reached when including '%s' from '%s'\n", MAX_INCLUDE_DEPTH, value, filename); + ERR("material: max include depth %d reached when including '%s' from '%s'", MAX_INCLUDE_DEPTH, value, filename); } } else { int *tex_id_dest = NULL; @@ -191,7 +194,7 @@ static void loadMaterialsFromFile( const char *filename, int depth ) { } else if (Q_stricmp(key, "base_color") == 0) { sscanf(value, "%f %f %f %f", ¤t_material.base_color[0], ¤t_material.base_color[1], ¤t_material.base_color[2], ¤t_material.base_color[3]); } else { - gEngine.Con_Printf(S_ERROR "Unknown material key \"%s\" on line `%.*s`\n", key, (int)(pos - line_begin), line_begin); + ERR("Unknown material key \"%s\" on line `%.*s`", key, (int)(pos - line_begin), line_begin); continue; } } @@ -249,7 +252,7 @@ void XVK_ReloadMaterials( void ) { // Print out statistics { const int duration_ms = (aprof_time_now_ns() - begin_time_ns) / 1000000ull; - gEngine.Con_Printf("Loading materials took %dms, .mat files parsed: %d (fread: %dms). Texture lookups: %d (%dms). Texture loads: %d (%dms).\n", + INFO("Loading materials took %dms, .mat files parsed: %d (fread: %dms). Texture lookups: %d (%dms). Texture loads: %d (%dms).", duration_ms, g_stats.mat_files_read, (int)(g_stats.material_file_read_duration_ns / 1000000ull), diff --git a/ref/vk/vk_math.c b/ref/vk/vk_math.c index 6a21101b..521ef35e 100644 --- a/ref/vk/vk_math.c +++ b/ref/vk/vk_math.c @@ -286,3 +286,25 @@ void computeTangent(vec3_t out_tangent, const vec3_t v0, const vec3_t v1, const out_tangent[1] = f * (duv2[1] * e1[1] - duv1[1] * e2[1]); out_tangent[2] = f * (duv2[1] * e1[2] - duv1[1] * e2[2]); } + +void Matrix4x4_CreateFromVectors(matrix4x4 out, const vec3_t right, const vec3_t up, const vec3_t z, const vec3_t translate) { + out[0][0] = right[0]; + out[1][0] = right[1]; + out[2][0] = right[2]; + out[3][0] = 0; + + out[0][1] = up[0]; + out[1][1] = up[1]; + out[2][1] = up[2]; + out[3][1] = 0; + + out[0][2] = z[0]; + out[1][2] = z[1]; + out[2][2] = z[2]; + out[3][2] = 0; + + out[0][3] = translate[0]; + out[1][3] = translate[1]; + out[2][3] = translate[2]; + out[3][3] = 1; +} diff --git a/ref/vk/vk_math.h b/ref/vk/vk_math.h index 55e19ec2..d3de5054 100644 --- a/ref/vk/vk_math.h +++ b/ref/vk/vk_math.h @@ -22,3 +22,5 @@ void Matrix4x4_CreateOrtho(matrix4x4 m, float xLeft, float xRight, float yBottom void Matrix4x4_CreateModelview( matrix4x4 out ); void computeTangent(vec3_t out_tangent, const vec3_t v0, const vec3_t v1, const vec3_t v2, const vec2_t uv0, const vec2_t uv1, const vec2_t uv2); + +void Matrix4x4_CreateFromVectors(matrix4x4 out, const vec3_t right, const vec3_t up, const vec3_t z, const vec3_t translate); diff --git a/ref/vk/vk_meatpipe.c b/ref/vk/vk_meatpipe.c index 022e48fa..7392b468 100644 --- a/ref/vk/vk_meatpipe.c +++ b/ref/vk/vk_meatpipe.c @@ -5,6 +5,9 @@ #include "ray_pass.h" #include "vk_common.h" +#include "vk_logs.h" + +#define LOG_MODULE LogModule_Meatpipe #define MIN(a,b) ((a)<(b)?(a):(b)) @@ -47,13 +50,13 @@ const void* curReadPtr(cursor_t *cur, int size) { #define CUR_ERROR(errmsg, ...) \ if (ctx->cur.error) { \ - gEngine.Con_Printf(S_ERROR "(off=%d left=%d) " errmsg "\n", ctx->cur.off, (ctx->cur.size - ctx->cur.off), ##__VA_ARGS__); \ + ERR("(off=%d left=%d) " errmsg "", ctx->cur.off, (ctx->cur.size - ctx->cur.off), ##__VA_ARGS__); \ goto finalize; \ } #define CUR_ERROR_RETURN(retval, errmsg, ...) \ if (ctx->cur.error) { \ - gEngine.Con_Printf(S_ERROR "(off=%d left=%d) " errmsg "\n", ctx->cur.off, (ctx->cur.size - ctx->cur.off), ##__VA_ARGS__); \ + ERR("(off=%d left=%d) " errmsg "", ctx->cur.off, (ctx->cur.size - ctx->cur.off), ##__VA_ARGS__); \ return retval; \ } @@ -103,7 +106,7 @@ static struct ray_pass_s *pipelineLoadCompute(load_context_t *ctx, int i, const const uint32_t shader_comp = READ_U32_RETURN(NULL, "Couldn't read comp shader for %d %s", i, name); if (shader_comp >= ctx->shaders_count) { - gEngine.Con_Printf(S_ERROR "Pipeline %s shader index out of bounds %d (count %d)\n", name, shader_comp, ctx->shaders_count); + ERR("Pipeline %s shader index out of bounds %d (count %d)", name, shader_comp, ctx->shaders_count); return NULL; } @@ -171,7 +174,7 @@ static qboolean readBindings(load_context_t *ctx, VkDescriptorSetLayoutBinding * const int count = READ_U32("Coulnd't read bindings count"); if (count > MAX_BINDINGS) { - gEngine.Con_Printf(S_ERROR "Too many binding (%d), max: %d\n", count, MAX_BINDINGS); + ERR("Too many binding (%d), max: %d", count, MAX_BINDINGS); goto finalize; } @@ -183,7 +186,7 @@ static qboolean readBindings(load_context_t *ctx, VkDescriptorSetLayoutBinding * const uint32_t stages = READ_U32("Couldn't read stages for binding %d", i); if (res_index >= ctx->meatpipe.resources_count) { - gEngine.Con_Printf(S_ERROR "Resource %d is out of bound %d for binding %d", res_index, ctx->meatpipe.resources_count, i); + ERR("Resource %d is out of bound %d for binding %d", res_index, ctx->meatpipe.resources_count, i); goto finalize; } @@ -200,7 +203,7 @@ static qboolean readBindings(load_context_t *ctx, VkDescriptorSetLayoutBinding * write_from = i; if (!write && write_from >= 0) { - gEngine.Con_Printf(S_ERROR "Unsorted non-write binding found at %d(%s), writable started at %d\n", + ERR("Unsorted non-write binding found at %d(%s), writable started at %d", i, res->name, write_from); goto finalize; } @@ -223,7 +226,7 @@ static qboolean readBindings(load_context_t *ctx, VkDescriptorSetLayoutBinding * if (create) res->flags |= MEATPIPE_RES_CREATE; - gEngine.Con_Reportf("Binding %d: %s ds=%d b=%d s=%08x res=%d type=%d write=%d\n", + DEBUG("Binding %d: %s ds=%d b=%d s=%08x res=%d type=%d write=%d", i, name, descriptor_set, binding, stages, res_index, res->descriptor_type, write); } @@ -254,10 +257,10 @@ static qboolean readAndCreatePass(load_context_t *ctx, int i) { char name[64]; READ_STR(name, "Couldn't read pipeline %d name", i); - gEngine.Con_Reportf("%d: loading pipeline %s\n", i, name); + DEBUG("%d: loading pipeline %s", i, name); if (!readBindings(ctx, bindings, pass)) { - gEngine.Con_Printf(S_ERROR "Couldn't read bindings for pipeline %s\n", name); + ERR("Couldn't read bindings for pipeline %s", name); return false; } @@ -275,7 +278,7 @@ static qboolean readAndCreatePass(load_context_t *ctx, int i) { pass->pass = pipelineLoadRT(ctx, i, name, &layout); break; default: - gEngine.Con_Printf(S_ERROR "Unexpected pipeline type %d\n", type); + ERR("Unexpected pipeline type %d", type); } if (pass->pass) @@ -307,7 +310,7 @@ static qboolean readResources(load_context_t *ctx) { res->prev_frame_index_plus_1 = READ_U32("Couldn't read resource %d:%s previous frame index", i, res->name); } - gEngine.Con_Reportf("Resource %d:%s = %08x is_image=%d image_format=%08x count=%d\n", + DEBUG("Resource %d:%s = %08x is_image=%d image_format=%08x count=%d", i, res->name, res->descriptor_type, is_image, res->image_format, res->count); } @@ -329,11 +332,11 @@ static qboolean readAndLoadShaders(load_context_t *ctx) { const void *src = READ_PTR(size, "Couldn't read shader %s data", name); if (VK_NULL_HANDLE == (ctx->shaders[i] = R_VkShaderLoadFromMem(src, size, name))) { - gEngine.Con_Printf(S_ERROR "Failed to load shader %d:%s\n", i, name); + ERR("Failed to load shader %d:%s", i, name); goto finalize; } - gEngine.Con_Reportf("%d: Shader loaded %s\n", i, name); + DEBUG("%d: Shader loaded %s", i, name); } return true; @@ -347,7 +350,7 @@ vk_meatpipe_t* R_VkMeatpipeCreateFromFile(const char *filename) { byte* const buf = gEngine.fsapi->LoadFile(filename, &size, false); if (!buf) { - gEngine.Con_Printf(S_ERROR "Couldn't read \"%s\"\n", filename); + ERR("Couldn't read \"%s\"", filename); return NULL; } @@ -363,7 +366,7 @@ vk_meatpipe_t* R_VkMeatpipeCreateFromFile(const char *filename) { const uint32_t magic = READ_U32("Couldn't read magic"); if (magic != k_meatpipe_magic) { - gEngine.Con_Printf(S_ERROR "Meatpipe magic invalid for \"%s\": got %08x expected %08x\n", filename, magic, k_meatpipe_magic); + ERR("Meatpipe magic invalid for \"%s\": got %08x expected %08x", filename, magic, k_meatpipe_magic); goto finalize; } } @@ -391,6 +394,8 @@ vk_meatpipe_t* R_VkMeatpipeCreateFromFile(const char *filename) { memcpy(ret, &ctx->meatpipe, sizeof(*ret)); ctx->meatpipe.resources = NULL; + INFO("Loaded meatpipe %s with %d passes and %d resources", filename, ret->passes_count, ret->resources_count); + finalize: for (int i = 0; i < ctx->shaders_count; ++i) { if (ctx->shaders[i] == VK_NULL_HANDLE) diff --git a/ref/vk/vk_previous_frame.c b/ref/vk/vk_previous_frame.c deleted file mode 100644 index f6c87e58..00000000 --- a/ref/vk/vk_previous_frame.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "vk_studio.h" -#include "vk_common.h" -#include "vk_textures.h" -#include "vk_render.h" -#include "vk_geometry.h" -#include "camera.h" - -#include "xash3d_mathlib.h" -#include "const.h" -#include "r_studioint.h" -#include "triangleapi.h" -#include "studio.h" -#include "pm_local.h" -#include "pmtrace.h" -#include "protocol.h" -#include "enginefeatures.h" -#include "pm_movevars.h" -#include "xash3d_types.h" - -#include -#include - -#define PREV_STATES_COUNT 1024 -#define PREV_FRAMES_COUNT 2 - -typedef struct { - matrix3x4 bones_worldtransform[MAXSTUDIOBONES]; - matrix4x4 model_transform; - float time; - uint bones_frame_updated; - uint frame_updated; -} prev_state_t; - -typedef struct { - prev_state_t prev_states[PREV_FRAMES_COUNT][PREV_STATES_COUNT]; - uint frame_index; - uint prev_frame_index; - uint current_frame_id; - uint previous_frame_id; -} prev_states_storage_t; - -prev_states_storage_t g_prev = { 0 }; - -prev_state_t* prevStateInArrayBounds( int frame_storage_id, int entity_id ) -{ - int clamped_entity_id = entity_id; - - if (entity_id >= PREV_STATES_COUNT) - { - gEngine.Con_Printf("Previous frame states data for entity %d overflows storage (size is %d). Increase it\n", entity_id, PREV_STATES_COUNT); - clamped_entity_id = PREV_STATES_COUNT - 1; // fallback to last correct value - } - else if (entity_id < 0) - { - clamped_entity_id = 0; // fallback to correct value - } - - return &g_prev.prev_states[frame_storage_id][clamped_entity_id]; -} - -#define PREV_FRAME(entity_id) prevStateInArrayBounds( g_prev.previous_frame_id, (entity_id) ) -#define CURRENT_FRAME(entity_id) prevStateInArrayBounds( g_prev.current_frame_id, (entity_id) ) - -void R_PrevFrame_StartFrame( void ) -{ - g_prev.frame_index++; - g_prev.current_frame_id = g_prev.frame_index % PREV_FRAMES_COUNT; - g_prev.previous_frame_id = (g_prev.frame_index - 1) % PREV_FRAMES_COUNT; -} - -void R_PrevFrame_SaveCurrentBoneTransforms( int entity_id, matrix3x4* bones_transforms, const matrix4x4 rotationmatrix_inv ) -{ - prev_state_t *current_frame = CURRENT_FRAME(entity_id); - - if (current_frame->bones_frame_updated == g_prev.frame_index) - return; // already updated for this entity - - current_frame->bones_frame_updated = g_prev.frame_index; - - for( int i = 0; i < MAXSTUDIOBONES; i++ ) - Matrix3x4_ConcatTransforms( current_frame->bones_worldtransform[i], rotationmatrix_inv, bones_transforms[i] ); -} - -void R_PrevFrame_SaveCurrentState( int entity_id, matrix4x4 model_transform ) -{ - prev_state_t* current_frame = CURRENT_FRAME(entity_id); - - if (current_frame->frame_updated == g_prev.frame_index) - return; // already updated for this entity - - Matrix4x4_Copy( current_frame->model_transform, model_transform ); - current_frame->time = gpGlobals->time; - current_frame->frame_updated = g_prev.frame_index; -} - -matrix3x4* R_PrevFrame_BoneTransforms( int entity_id ) -{ - prev_state_t* prev_frame = PREV_FRAME(entity_id); - - // fallback to current frame if previous is outdated - if (prev_frame->bones_frame_updated != g_prev.frame_index - 1) - return CURRENT_FRAME(entity_id)->bones_worldtransform; - - return prev_frame->bones_worldtransform; -} - -void R_PrevFrame_ModelTransform( int entity_id, matrix4x4 model_matrix ) -{ - prev_state_t* prev_frame = PREV_FRAME(entity_id); - - // fallback to current frame if previous is outdated - if (prev_frame->frame_updated != g_prev.frame_index - 1) - prev_frame = CURRENT_FRAME(entity_id); - - Matrix4x4_Copy(model_matrix, prev_frame->model_transform); -} - -float R_PrevFrame_Time( int entity_id ) -{ - prev_state_t* prev_frame = PREV_FRAME(entity_id); - - // fallback to current frame if previous is outdated - if (prev_frame->frame_updated != g_prev.frame_index - 1) - return gpGlobals->time; - - return prev_frame->time; -} diff --git a/ref/vk/vk_previous_frame.h b/ref/vk/vk_previous_frame.h deleted file mode 100644 index 5841dc70..00000000 --- a/ref/vk/vk_previous_frame.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include "vk_common.h" - -void R_PrevFrame_StartFrame(void); -void R_PrevFrame_SaveCurrentBoneTransforms(int entity_id, matrix3x4* bones_transforms, const matrix4x4 rotationmatrix_inv); -void R_PrevFrame_SaveCurrentState(int entity_id, matrix4x4 model_transform); -matrix3x4* R_PrevFrame_BoneTransforms(int entity_id); -void R_PrevFrame_ModelTransform(int entity_id, matrix4x4 model_matrix); -float R_PrevFrame_Time(int entity_id); diff --git a/ref/vk/vk_ray_accel.c b/ref/vk/vk_ray_accel.c index c9d0ad7d..1f40a798 100644 --- a/ref/vk/vk_ray_accel.c +++ b/ref/vk/vk_ray_accel.c @@ -7,22 +7,98 @@ #include "vk_combuf.h" #include "vk_staging.h" #include "vk_math.h" +#include "vk_geometry.h" +#include "vk_render.h" +#include "vk_logs.h" #include "xash3d_mathlib.h" -#define MAX_SCRATCH_BUFFER (32*1024*1024) -#define MAX_ACCELS_BUFFER (64*1024*1024) +#define LOG_MODULE LogModule_RT -struct rt_vk_ray_accel_s g_accel = {0}; +#define MODULE_NAME "accel" + +typedef struct rt_blas_s { + const char *debug_name; + rt_blas_usage_e usage; + + VkAccelerationStructureKHR blas; + + int max_geoms; + //uint32_t *max_prim_counts; + int blas_size; +} rt_blas_t; static struct { + // Stores AS built data. Lifetime similar to render buffer: + // - some portion lives for entire map lifetime + // - some portion lives only for a single frame (may have several frames in flight) + // TODO: unify this with render buffer + // Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT + vk_buffer_t accels_buffer; + struct alo_pool_s *accels_buffer_alloc; + + // Temp: lives only during a single frame (may have many in flight) + // Used for building ASes; + // Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT + vk_buffer_t scratch_buffer; + VkDeviceAddress accels_buffer_addr, scratch_buffer_addr; + + // Temp-ish: used for making TLAS, contains addressed to all used BLASes + // Lifetime and nature of usage similar to scratch_buffer + // TODO: unify them + // Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY + vk_buffer_t tlas_geom_buffer; + VkDeviceAddress tlas_geom_buffer_addr; + r_flipping_buffer_t tlas_geom_buffer_alloc; + + // TODO need several TLASes for N frames in flight + VkAccelerationStructureKHR tlas; + + // Per-frame data that is accumulated between RayFrameBegin and End calls struct { - int blas_count; + uint32_t scratch_offset; // for building dynamic blases + } frame; + + struct { + int instances_count; int accels_built; } stats; -} g_accel_; +} g_accel; -static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) { +static VkAccelerationStructureBuildSizesInfoKHR getAccelSizes(const VkAccelerationStructureBuildGeometryInfoKHR *build_info, const uint32_t *max_prim_counts) { + VkAccelerationStructureBuildSizesInfoKHR build_size = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR + }; + + vkGetAccelerationStructureBuildSizesKHR( + vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, build_info, max_prim_counts, &build_size); + + return build_size; +} + +static VkAccelerationStructureKHR createAccel(const char *name, VkAccelerationStructureTypeKHR type, uint32_t size) { + const alo_block_t block = aloPoolAllocate(g_accel.accels_buffer_alloc, size, /*TODO why? align=*/256); + + if (block.offset == ALO_ALLOC_FAILED) { + ERR("Failed to allocated %u bytes for blas \"%s\"", size, name); + return VK_NULL_HANDLE; + } + + const VkAccelerationStructureCreateInfoKHR asci = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, + .buffer = g_accel.accels_buffer.buffer, + .offset = block.offset, + .type = type, + .size = size, + }; + + VkAccelerationStructureKHR accel = VK_NULL_HANDLE; + XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, &accel)); + SET_DEBUG_NAME(accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, name); + return accel; +} + +static VkDeviceAddress getAccelAddress(VkAccelerationStructureKHR as) { VkAccelerationStructureDeviceAddressInfoKHR asdai = { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, .accelerationStructure = as, @@ -30,110 +106,100 @@ static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) { return vkGetAccelerationStructureDeviceAddressKHR(vk_core.device, &asdai); } -// TODO split this into smaller building blocks in a separate module -qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args, vk_ray_model_t *model) { - qboolean should_create = *args->p_accel == VK_NULL_HANDLE; -#if 1 // update does not work at all on AMD gpus - qboolean is_update = false; // FIXME this crashes for some reason !should_create && args->dynamic; -#else - qboolean is_update = !should_create && args->dynamic; -#endif - - VkAccelerationStructureBuildGeometryInfoKHR build_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, - .type = args->type, - .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR | ( args->dynamic ? VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR : 0), - .mode = is_update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, - .geometryCount = args->n_geoms, - .pGeometries = args->geoms, - .srcAccelerationStructure = is_update ? *args->p_accel : VK_NULL_HANDLE, - }; - - VkAccelerationStructureBuildSizesInfoKHR build_size = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR - }; - - uint32_t scratch_buffer_size = 0; - - ASSERT(args->geoms); - ASSERT(args->n_geoms > 0); - ASSERT(args->p_accel); - - vkGetAccelerationStructureBuildSizesKHR( - vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info, args->max_prim_counts, &build_size); - - scratch_buffer_size = is_update ? build_size.updateScratchSize : build_size.buildScratchSize; - -#if 0 +static qboolean buildAccel(VkBuffer geometry_buffer, VkAccelerationStructureBuildGeometryInfoKHR *build_info, uint32_t scratch_buffer_size, const VkAccelerationStructureBuildRangeInfoKHR *build_ranges) { + // FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too + vk_combuf_t *const combuf = R_VkStagingCommit(); { - uint32_t max_prims = 0; - for (int i = 0; i < args->n_geoms; ++i) - max_prims += args->max_prim_counts[i]; - gEngine.Con_Reportf( - "AS max_prims=%u, n_geoms=%u, build size: %d, scratch size: %d\n", max_prims, args->n_geoms, build_size.accelerationStructureSize, build_size.buildScratchSize); + const VkBufferMemoryBarrier bmb[] = { { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT, // FIXME + .buffer = geometry_buffer, + .offset = 0, // FIXME + .size = VK_WHOLE_SIZE, // FIXME + } }; + vkCmdPipelineBarrier(combuf->cmdbuf, + VK_PIPELINE_STAGE_TRANSFER_BIT, + //VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, + 0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL); } -#endif + + //gEngine.Con_Reportf("sratch offset = %d, req=%d", g_accel.frame.scratch_offset, scratch_buffer_size); if (MAX_SCRATCH_BUFFER < g_accel.frame.scratch_offset + scratch_buffer_size) { - gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n", + ERR("Scratch buffer overflow: left %u bytes, but need %u", MAX_SCRATCH_BUFFER - g_accel.frame.scratch_offset, scratch_buffer_size); return false; } + build_info->scratchData.deviceAddress = g_accel.scratch_buffer_addr + g_accel.frame.scratch_offset; + + //uint32_t scratch_offset_initial = g_accel.frame.scratch_offset; + g_accel.frame.scratch_offset += scratch_buffer_size; + g_accel.frame.scratch_offset = ALIGN_UP(g_accel.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment); + + //gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size); + + g_accel.stats.accels_built++; + + static int scope_id = -2; + if (scope_id == -2) + scope_id = R_VkGpuScope_Register("build_as"); + const int begin_index = R_VkCombufScopeBegin(combuf, scope_id); + const VkAccelerationStructureBuildRangeInfoKHR *p_build_ranges = build_ranges; + vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, build_info, &p_build_ranges); + R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); + + return true; +} + +// TODO split this into smaller building blocks in a separate module +qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args) { + ASSERT(args->geoms); + ASSERT(args->n_geoms > 0); + ASSERT(args->p_accel); + + const qboolean should_create = *args->p_accel == VK_NULL_HANDLE; + + VkAccelerationStructureBuildGeometryInfoKHR build_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = args->type, + .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, + .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .geometryCount = args->n_geoms, + .pGeometries = args->geoms, + .srcAccelerationStructure = VK_NULL_HANDLE, + }; + + const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, args->max_prim_counts); + if (should_create) { - const uint32_t as_size = build_size.accelerationStructureSize; - const alo_block_t block = aloPoolAllocate(g_accel.accels_buffer_alloc, as_size, /*TODO why? align=*/256); - const uint32_t buffer_offset = block.offset; - const VkAccelerationStructureCreateInfoKHR asci = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, - .buffer = g_accel.accels_buffer.buffer, - .offset = buffer_offset, - .type = args->type, - .size = as_size, - }; + *args->p_accel = createAccel(args->debug_name, args->type, build_size.accelerationStructureSize); - if (buffer_offset == ALO_ALLOC_FAILED) { - gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for accel buffer\n", (uint32_t)asci.size); + if (!args->p_accel) return false; - } - XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, args->p_accel)); - SET_DEBUG_NAME(*args->p_accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, args->debug_name); + if (args->out_accel_addr) + *args->out_accel_addr = getAccelAddress(*args->p_accel); - if (model) { - model->size = asci.size; - model->debug.as_offset = buffer_offset; - } + if (args->inout_size) + *args->inout_size = build_size.accelerationStructureSize; - // gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x\n", *args->p_accel, args->n_geoms, buffer_offset, asci.size, buffer_offset + asci.size); + // gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x", *args->p_accel, args->n_geoms, buffer_offset, asci.size, buffer_offset + asci.size); } // If not enough data for building, just create if (!combuf || !args->build_ranges) return true; - if (model) { - ASSERT(model->size >= build_size.accelerationStructureSize); - } + if (args->inout_size) + ASSERT(*args->inout_size >= build_size.accelerationStructureSize); build_info.dstAccelerationStructure = *args->p_accel; - build_info.scratchData.deviceAddress = g_accel.scratch_buffer_addr + g_accel.frame.scratch_offset; - //uint32_t scratch_offset_initial = g_accel.frame.scratch_offset; - g_accel.frame.scratch_offset += scratch_buffer_size; - g_accel.frame.scratch_offset = ALIGN_UP(g_accel.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment); - - //gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size); - - g_accel_.stats.accels_built++; - - static int scope_id = -2; - if (scope_id == -2) - scope_id = R_VkGpuScope_Register("build_as"); - const int begin_index = R_VkCombufScopeBegin(combuf, scope_id); - vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, &build_info, &args->build_ranges); - R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); - return true; + const VkBuffer geometry_buffer = R_GeometryBuffer_Get(); + return buildAccel(geometry_buffer, &build_info, build_size.buildScratchSize, args->build_ranges); } static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) { @@ -150,9 +216,9 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) { }, }, }; - const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_ACCELS }; //cmdbuf == VK_NULL_HANDLE ? MAX_ACCELS : g_ray_model_state.frame.num_models }; + const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_INSTANCES }; const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = { - .primitiveCount = g_ray_model_state.frame.num_models, + .primitiveCount = g_ray_model_state.frame.instances_count, }; const as_build_args_t asrgs = { .geoms = tl_geom, @@ -164,20 +230,22 @@ static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) { .dynamic = false, .p_accel = &g_accel.tlas, .debug_name = "TLAS", + .out_accel_addr = NULL, + .inout_size = NULL, }; - if (!createOrUpdateAccelerationStructure(combuf, &asrgs, NULL)) { + if (!createOrUpdateAccelerationStructure(combuf, &asrgs)) { gEngine.Host_Error("Could not create/update TLAS\n"); return; } } -void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { - ASSERT(g_ray_model_state.frame.num_models > 0); +vk_resource_t RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { + ASSERT(g_ray_model_state.frame.instances_count > 0); DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas"); R_FlippingBuffer_Flip( &g_accel.tlas_geom_buffer_alloc ); - const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.num_models, 1); + const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.instances_count, 1); ASSERT(instance_offset != ALO_ALLOC_FAILED); // Upload all blas instances references to GPU mem @@ -185,23 +253,22 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { const vk_staging_region_t headers_lock = R_VkStagingLockForBuffer((vk_staging_buffer_args_t){ .buffer = g_ray_model_state.model_headers_buffer.buffer, .offset = 0, - .size = g_ray_model_state.frame.num_models * sizeof(struct ModelHeader), + .size = g_ray_model_state.frame.instances_count * sizeof(struct ModelHeader), .alignment = 16, }); ASSERT(headers_lock.ptr); VkAccelerationStructureInstanceKHR* inst = ((VkAccelerationStructureInstanceKHR*)g_accel.tlas_geom_buffer.mapped) + instance_offset; - for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) { - const vk_ray_draw_model_t* const model = g_ray_model_state.frame.models + i; - ASSERT(model->model); - ASSERT(model->model->as != VK_NULL_HANDLE); + for (int i = 0; i < g_ray_model_state.frame.instances_count; ++i) { + const rt_draw_instance_t* const instance = g_ray_model_state.frame.instances + i; + ASSERT(instance->blas_addr != 0); inst[i] = (VkAccelerationStructureInstanceKHR){ - .instanceCustomIndex = model->model->kusochki_offset, + .instanceCustomIndex = instance->kusochki_offset, .instanceShaderBindingTableRecordOffset = 0, - .accelerationStructureReference = getASAddress(model->model->as), // TODO cache this addr + .accelerationStructureReference = instance->blas_addr, }; - switch (model->material_mode) { + switch (instance->material_mode) { case MATERIAL_MODE_OPAQUE: inst[i].mask = GEOMETRY_BIT_OPAQUE; inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_REGULAR, @@ -225,21 +292,21 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR; break; default: - gEngine.Host_Error("Unexpected material mode %d\n", model->material_mode); + gEngine.Host_Error("Unexpected material mode %d\n", instance->material_mode); break; } - memcpy(&inst[i].transform, model->transform_row, sizeof(VkTransformMatrixKHR)); + memcpy(&inst[i].transform, instance->transform_row, sizeof(VkTransformMatrixKHR)); struct ModelHeader *const header = ((struct ModelHeader*)headers_lock.ptr) + i; - header->mode = model->material_mode; - Vector4Copy(model->model->color, header->color); - Matrix4x4_ToArrayFloatGL(model->model->prev_transform, (float*)header->prev_transform); + header->mode = instance->material_mode; + Vector4Copy(instance->color, header->color); + Matrix4x4_ToArrayFloatGL(instance->prev_transform_row, (float*)header->prev_transform); } R_VkStagingUnlock(headers_lock.handle); } - g_accel_.stats.blas_count = g_ray_model_state.frame.num_models; + g_accel.stats.instances_count = g_ray_model_state.frame.instances_count; // Barrier for building all BLASes // BLAS building is now in cmdbuf, need to synchronize with results @@ -250,7 +317,7 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, .buffer = g_accel.accels_buffer.buffer, .offset = instance_offset * sizeof(VkAccelerationStructureInstanceKHR), - .size = g_ray_model_state.frame.num_models * sizeof(VkAccelerationStructureInstanceKHR), + .size = g_ray_model_state.frame.instances_count * sizeof(VkAccelerationStructureInstanceKHR), }}; vkCmdPipelineBarrier(combuf->cmdbuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, @@ -261,6 +328,34 @@ void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { // 2. Build TLAS createTlas(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR)); DEBUG_END(combuf->cmdbuf); + + // TODO return vk_resource_t with callback to all this "do the preparation and barriers" crap, instead of doing it here + { + const VkBufferMemoryBarrier bmb[] = { { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .buffer = g_accel.accels_buffer.buffer, + .offset = 0, + .size = VK_WHOLE_SIZE, + } }; + vkCmdPipelineBarrier(combuf->cmdbuf, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL); + } + + return (vk_resource_t){ + .type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, + .value = (vk_descriptor_value_t){ + .accel = (VkWriteDescriptorSetAccelerationStructureKHR) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, + .accelerationStructureCount = 1, + .pAccelerationStructures = &g_accel.tlas, + .pNext = NULL, + }, + }, + }; } qboolean RT_VkAccelInit(void) { @@ -282,7 +377,7 @@ qboolean RT_VkAccelInit(void) { g_accel.scratch_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.scratch_buffer.buffer); // TODO this doesn't really need to be host visible, use staging - if (!VK_BufferCreate("ray tlas_geom_buffer", &g_accel.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_ACCELS * 2, + if (!VK_BufferCreate("ray tlas_geom_buffer", &g_accel.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_INSTANCES * 2, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { @@ -290,10 +385,12 @@ qboolean RT_VkAccelInit(void) { return false; } g_accel.tlas_geom_buffer_addr = R_VkBufferGetDeviceAddress(g_accel.tlas_geom_buffer.buffer); - R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_ACCELS * 2); + R_FlippingBuffer_Init(&g_accel.tlas_geom_buffer_alloc, MAX_INSTANCES * 2); - R_SpeedsRegisterMetric(&g_accel_.stats.blas_count, "blas_count", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_accel_.stats.accels_built, "accels_built", kSpeedsMetricCount); + g_accel.accels_buffer_alloc = aloPoolCreate(MAX_ACCELS_BUFFER, MAX_INSTANCES, /* why */ 256); + + R_SPEEDS_COUNTER(g_accel.stats.instances_count, "instances", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_accel.stats.accels_built, "built", kSpeedsMetricCount); return true; } @@ -302,13 +399,6 @@ void RT_VkAccelShutdown(void) { if (g_accel.tlas != VK_NULL_HANDLE) vkDestroyAccelerationStructureKHR(vk_core.device, g_accel.tlas, NULL); - for (int i = 0; i < COUNTOF(g_ray_model_state.models_cache); ++i) { - vk_ray_model_t *model = g_ray_model_state.models_cache + i; - if (model->as != VK_NULL_HANDLE) - vkDestroyAccelerationStructureKHR(vk_core.device, model->as, NULL); - model->as = VK_NULL_HANDLE; - } - VK_BufferDestroy(&g_accel.scratch_buffer); VK_BufferDestroy(&g_accel.accels_buffer); VK_BufferDestroy(&g_accel.tlas_geom_buffer); @@ -327,12 +417,6 @@ void RT_VkAccelNewMap(void) { aloPoolDestroy(g_accel.accels_buffer_alloc); g_accel.accels_buffer_alloc = aloPoolCreate(MAX_ACCELS_BUFFER, expected_accels, accels_alignment); - // Clear model cache - for (int i = 0; i < COUNTOF(g_ray_model_state.models_cache); ++i) { - vk_ray_model_t *model = g_ray_model_state.models_cache + i; - VK_RayModelDestroy(model); - } - // Recreate tlas // Why here and not in init: to make sure that its memory is preserved. Map init will clear all memory regions. { @@ -348,3 +432,211 @@ void RT_VkAccelNewMap(void) { void RT_VkAccelFrameBegin(void) { g_accel.frame.scratch_offset = 0; } + +struct rt_blas_s* RT_BlasCreate(const char *name, rt_blas_usage_e usage) { + rt_blas_t *blas = Mem_Calloc(vk_core.pool, sizeof(*blas)); + + blas->debug_name = name; + blas->usage = usage; + blas->blas_size = -1; + + return blas; +} + +qboolean RT_BlasPreallocate(struct rt_blas_s* blas, rt_blas_preallocate_t args) { + ASSERT(!blas->blas); + ASSERT(blas->usage == kBlasBuildDynamicFast); + + // TODO allocate these from static pool + VkAccelerationStructureGeometryKHR *const as_geoms = Mem_Calloc(vk_core.pool, args.max_geometries * sizeof(*as_geoms)); + uint32_t *const max_prim_counts = Mem_Malloc(vk_core.pool, args.max_geometries * sizeof(*max_prim_counts)); + VkAccelerationStructureBuildRangeInfoKHR *const build_ranges = Mem_Calloc(vk_core.pool, args.max_geometries * sizeof(*build_ranges)); + + for (int i = 0; i < args.max_geometries; ++i) { + max_prim_counts[i] = args.max_prims_per_geometry; + as_geoms[i] = (VkAccelerationStructureGeometryKHR) + { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // FIXME this is not true. incoming mode might have transparency eventually (and also dynamically) + .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, + .geometry.triangles = + (VkAccelerationStructureGeometryTrianglesDataKHR){ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, + .indexType = VK_INDEX_TYPE_UINT16, + .maxVertex = args.max_vertex_per_geometry, + .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, + .vertexStride = sizeof(vk_vertex_t), + .vertexData.deviceAddress = 0, + .indexData.deviceAddress = 0, + }, + }; + + build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) { + .primitiveCount = args.max_prims_per_geometry, + .primitiveOffset = 0, + .firstVertex = 0, + }; + } + + VkAccelerationStructureBuildGeometryInfoKHR build_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR, + .geometryCount = args.max_geometries, + .srcAccelerationStructure = VK_NULL_HANDLE, + .pGeometries = as_geoms, + }; + + const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, max_prim_counts); + DEBUG("geoms=%d max_prims=%d max_vertex=%d => blas=%dKiB", + args.max_geometries, args.max_prims_per_geometry, args.max_vertex_per_geometry, (int)build_size.accelerationStructureSize / 1024); + + qboolean retval = false; + + blas->blas = createAccel(blas->debug_name, VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, build_size.accelerationStructureSize); + if (!blas->blas) { + ERR("Couldn't preallocate blas %s", blas->debug_name); + goto finalize; + } + + retval = true; + + blas->blas_size = build_size.accelerationStructureSize; + blas->max_geoms = build_info.geometryCount; + +finalize: + Mem_Free(as_geoms); + Mem_Free(max_prim_counts); + Mem_Free(build_ranges); + return retval; +} + +void RT_BlasDestroy(struct rt_blas_s* blas) { + if (!blas) + return; + + /* if (blas->max_prims) */ + /* Mem_Free(blas->max_prims); */ + + if (blas->blas) + vkDestroyAccelerationStructureKHR(vk_core.device, blas->blas, NULL); + + Mem_Free(blas); +} + +VkDeviceAddress RT_BlasGetDeviceAddress(struct rt_blas_s *blas) { + return getAccelAddress(blas->blas); +} + +qboolean RT_BlasBuild(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count) { + if (!blas || !geoms_count) + return false; + + VkAccelerationStructureBuildGeometryInfoKHR build_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, + .geometryCount = geoms_count, + .srcAccelerationStructure = VK_NULL_HANDLE, + }; + + qboolean is_update = false; + + switch (blas->usage) { + case kBlasBuildStatic: + ASSERT(!blas->blas); + break; + case kBlasBuildDynamicUpdate: + if (blas->blas) { + build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR; + build_info.srcAccelerationStructure = blas->blas; + is_update = true; + } + build_info.flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR; + break; + case kBlasBuildDynamicFast: + build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR; + break; + } + + const VkBuffer geometry_buffer = R_GeometryBuffer_Get(); + const VkDeviceAddress buffer_addr = R_VkBufferGetDeviceAddress(geometry_buffer); + + // TODO allocate these from static pool + VkAccelerationStructureGeometryKHR *const as_geoms = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*as_geoms)); + uint32_t *const max_prim_counts = Mem_Malloc(vk_core.pool, geoms_count * sizeof(*max_prim_counts)); + VkAccelerationStructureBuildRangeInfoKHR *const build_ranges = Mem_Calloc(vk_core.pool, geoms_count * sizeof(*build_ranges)); + + for (int i = 0; i < geoms_count; ++i) { + const vk_render_geometry_t *mg = geoms + i; + const uint32_t prim_count = mg->element_count / 3; + + max_prim_counts[i] = prim_count; + as_geoms[i] = (VkAccelerationStructureGeometryKHR) + { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // FIXME this is not true. incoming mode might have transparency eventually (and also dynamically) + .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, + .geometry.triangles = + (VkAccelerationStructureGeometryTrianglesDataKHR){ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, + .indexType = VK_INDEX_TYPE_UINT16, + .maxVertex = mg->max_vertex, + .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, + .vertexStride = sizeof(vk_vertex_t), + .vertexData.deviceAddress = buffer_addr, + .indexData.deviceAddress = buffer_addr, + }, + }; + + build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) { + .primitiveCount = prim_count, + .primitiveOffset = mg->index_offset * sizeof(uint16_t), + .firstVertex = mg->vertex_offset, + }; + } + + build_info.pGeometries = as_geoms; + + const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, max_prim_counts); + + qboolean retval = false; + + // allocate blas + if (!blas->blas) { + blas->blas = createAccel(blas->debug_name, VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, build_size.accelerationStructureSize); + if (!blas->blas) { + ERR("Couldn't create vk accel"); + goto finalize; + } + + blas->blas_size = build_size.accelerationStructureSize; + blas->max_geoms = build_info.geometryCount; + } else { + if (blas->blas_size < build_size.accelerationStructureSize) { + ERR("Fast dynamic BLAS %s size exceeded (need %dKiB, have %dKiB, geoms = %d)", blas->debug_name, + (int)build_size.accelerationStructureSize / 1024, + blas->blas_size / 1024, + geoms_count + ); + goto finalize; + } + } + + // Build + build_info.dstAccelerationStructure = blas->blas; + if (!buildAccel(geometry_buffer, &build_info, is_update ? build_size.updateScratchSize : build_size.buildScratchSize, build_ranges)) { + ERR("Couldn't build BLAS %s", blas->debug_name); + goto finalize; + } + + retval = true; + +finalize: + Mem_Free(as_geoms); + Mem_Free(max_prim_counts); + Mem_Free(build_ranges); + return retval; +} diff --git a/ref/vk/vk_ray_accel.h b/ref/vk/vk_ray_accel.h index 0bb468c9..dbaa3fa6 100644 --- a/ref/vk/vk_ray_accel.h +++ b/ref/vk/vk_ray_accel.h @@ -2,44 +2,14 @@ #include "vk_core.h" #include "vk_buffer.h" - -struct rt_vk_ray_accel_s { - // Stores AS built data. Lifetime similar to render buffer: - // - some portion lives for entire map lifetime - // - some portion lives only for a single frame (may have several frames in flight) - // TODO: unify this with render buffer - // Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT - vk_buffer_t accels_buffer; - struct alo_pool_s *accels_buffer_alloc; - - // Temp: lives only during a single frame (may have many in flight) - // Used for building ASes; - // Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT - vk_buffer_t scratch_buffer; - VkDeviceAddress accels_buffer_addr, scratch_buffer_addr; - - // Temp-ish: used for making TLAS, contains addressed to all used BLASes - // Lifetime and nature of usage similar to scratch_buffer - // TODO: unify them - // Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY - vk_buffer_t tlas_geom_buffer; - VkDeviceAddress tlas_geom_buffer_addr; - r_flipping_buffer_t tlas_geom_buffer_alloc; - - // TODO need several TLASes for N frames in flight - VkAccelerationStructureKHR tlas; - - // Per-frame data that is accumulated between RayFrameBegin and End calls - struct { - uint32_t scratch_offset; // for building dynamic blases - } frame; -}; - -extern struct rt_vk_ray_accel_s g_accel; +#include "vk_math.h" +#include "ray_resources.h" qboolean RT_VkAccelInit(void); void RT_VkAccelShutdown(void); + void RT_VkAccelNewMap(void); void RT_VkAccelFrameBegin(void); + struct vk_combuf_s; -void RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf); +vk_resource_t RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf); diff --git a/ref/vk/vk_ray_internal.h b/ref/vk/vk_ray_internal.h index f1054bad..33a48b06 100644 --- a/ref/vk/vk_ray_internal.h +++ b/ref/vk/vk_ray_internal.h @@ -3,40 +3,24 @@ #include "vk_core.h" #include "vk_buffer.h" #include "vk_const.h" +#include "vk_rtx.h" -#define MAX_ACCELS 2048 +#define MAX_INSTANCES 2048 #define MAX_KUSOCHKI 32768 #define MODEL_CACHE_SIZE 2048 #include "shaders/ray_interop.h" -typedef struct vk_ray_model_s { - VkAccelerationStructureKHR as; - VkAccelerationStructureGeometryKHR *geoms; - int max_prims; - int num_geoms; - int size; - uint32_t kusochki_offset; - qboolean dynamic; - qboolean taken; - - // TODO remove with the split of Kusok in Model+Material+Kusok - uint32_t material_mode; - vec4_t color; - matrix4x4 prev_transform; - - struct { - uint32_t as_offset; - } debug; -} vk_ray_model_t; - typedef struct Kusok vk_kusok_data_t; -typedef struct { +typedef struct rt_draw_instance_s { + VkDeviceAddress blas_addr; + uint32_t kusochki_offset; matrix3x4 transform_row; - vk_ray_model_t *model; + matrix4x4 prev_transform_row; + vec4_t color; uint32_t material_mode; // MATERIAL_MODE_ from ray_interop.h -} vk_ray_draw_model_t; +} rt_draw_instance_t; typedef struct { const char *debug_name; @@ -47,18 +31,25 @@ typedef struct { uint32_t n_geoms; VkAccelerationStructureTypeKHR type; qboolean dynamic; + + VkDeviceAddress *out_accel_addr; + uint32_t *inout_size; } as_build_args_t; struct vk_combuf_s; -qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args, vk_ray_model_t *model); +qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args); + +#define MAX_SCRATCH_BUFFER (32*1024*1024) +#define MAX_ACCELS_BUFFER (64*1024*1024) typedef struct { // Geometry metadata. Lifetime is similar to geometry lifetime itself. // Semantically close to render buffer (describes layout for those objects) - // TODO unify with render buffer + // TODO unify with render buffer? // Needs: STORAGE_BUFFER vk_buffer_t kusochki_buffer; r_debuffer_t kusochki_alloc; + // TODO when fully rt_model: r_blocks_t alloc; // Model header // Array of struct ModelHeader: color, material_mode, prev_transform @@ -66,13 +57,11 @@ typedef struct { // Per-frame data that is accumulated between RayFrameBegin and End calls struct { - int num_models; - int num_lighttextures; - vk_ray_draw_model_t models[MAX_ACCELS]; + rt_draw_instance_t instances[MAX_INSTANCES]; + int instances_count; + uint32_t scratch_offset; // for building dynamic blases } frame; - - vk_ray_model_t models_cache[MODEL_CACHE_SIZE]; } xvk_ray_model_state_t; extern xvk_ray_model_state_t g_ray_model_state; @@ -81,3 +70,41 @@ void XVK_RayModel_ClearForNextFrame( void ); void XVK_RayModel_Validate(void); void RT_RayModel_Clear(void); + +// Just creates an empty BLAS structure, doesn't alloc anything +// Memory pointed to by name must remain alive until RT_BlasDestroy +struct rt_blas_s* RT_BlasCreate(const char *name, rt_blas_usage_e usage); + +// Preallocate BLAS with given estimates +typedef struct { + int max_geometries; + int max_prims_per_geometry; + int max_vertex_per_geometry; +} rt_blas_preallocate_t; +qboolean RT_BlasPreallocate(struct rt_blas_s* blas, rt_blas_preallocate_t args); + +void RT_BlasDestroy(struct rt_blas_s* blas); + +// 1. Schedules BLAS build (allocates geoms+ranges from a temp pool, etc). +// 2. Allocates kusochki (if not) and fills them with geom and initial material data +qboolean RT_BlasBuild(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count); + +VkDeviceAddress RT_BlasGetDeviceAddress(struct rt_blas_s *blas); + +typedef struct rt_kusochki_s { + uint32_t offset; + int count; + int internal_index__; +} rt_kusochki_t; + +rt_kusochki_t RT_KusochkiAllocLong(int count); +uint32_t RT_KusochkiAllocOnce(int count); +void RT_KusochkiFree(const rt_kusochki_t*); + +struct vk_render_geometry_s; +qboolean RT_KusochkiUpload(uint32_t kusochki_offset, const struct vk_render_geometry_s *geoms, int geoms_count, int override_texture_id, const vec4_t *override_color); + +qboolean RT_DynamicModelInit(void); +void RT_DynamicModelShutdown(void); + +void RT_DynamicModelProcessFrame(void); diff --git a/ref/vk/vk_ray_model.c b/ref/vk/vk_ray_model.c index c34361bd..046c4c32 100644 --- a/ref/vk/vk_ray_model.c +++ b/ref/vk/vk_ray_model.c @@ -17,153 +17,21 @@ xvk_ray_model_state_t g_ray_model_state; -static void returnModelToCache(vk_ray_model_t *model) { - ASSERT(model->taken); - model->taken = false; -} +typedef struct rt_model_s { + struct rt_blas_s *blas; + VkDeviceAddress blas_addr; + rt_kusochki_t kusochki; +} rt_model_t; -static vk_ray_model_t *getModelFromCache(int num_geoms, int max_prims, const VkAccelerationStructureGeometryKHR *geoms) { //}, int size) { - vk_ray_model_t *model = NULL; - int i; - for (i = 0; i < ARRAYSIZE(g_ray_model_state.models_cache); ++i) - { - int j; - model = g_ray_model_state.models_cache + i; - if (model->taken) - continue; - - if (!model->as) - break; - - if (model->num_geoms != num_geoms) - continue; - - if (model->max_prims != max_prims) - continue; - - for (j = 0; j < num_geoms; ++j) { - if (model->geoms[j].geometryType != geoms[j].geometryType) - break; - - if (model->geoms[j].flags != geoms[j].flags) - break; - - if (geoms[j].geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR) { - // TODO what else should we compare? - if (model->geoms[j].geometry.triangles.maxVertex != geoms[j].geometry.triangles.maxVertex) - break; - - ASSERT(model->geoms[j].geometry.triangles.vertexStride == geoms[j].geometry.triangles.vertexStride); - ASSERT(model->geoms[j].geometry.triangles.vertexFormat == geoms[j].geometry.triangles.vertexFormat); - ASSERT(model->geoms[j].geometry.triangles.indexType == geoms[j].geometry.triangles.indexType); - } else { - PRINT_NOT_IMPLEMENTED_ARGS("Non-tri geometries are not implemented"); - break; - } - } - - if (j == num_geoms) - break; - } - - if (i == ARRAYSIZE(g_ray_model_state.models_cache)) - return NULL; - - // if (model->size > 0) - // ASSERT(model->size >= size); - - if (!model->geoms) { - const size_t size = sizeof(*geoms) * num_geoms; - model->geoms = Mem_Malloc(vk_core.pool, size); - memcpy(model->geoms, geoms, size); - model->num_geoms = num_geoms; - model->max_prims = max_prims; - } - - model->taken = true; - return model; -} - -static void assertNoOverlap( uint32_t o1, uint32_t s1, uint32_t o2, uint32_t s2 ) { - uint32_t min_offset, min_size; - uint32_t max_offset; - - if (o1 < o2) { - min_offset = o1; - min_size = s1; - max_offset = o2; - } else { - min_offset = o2; - min_size = s2; - max_offset = o1; - } - - ASSERT(min_offset + min_size <= max_offset); -} - -static void validateModelPair( const vk_ray_model_t *m1, const vk_ray_model_t *m2 ) { - if (m1 == m2) return; - if (!m2->num_geoms) return; - assertNoOverlap(m1->debug.as_offset, m1->size, m2->debug.as_offset, m2->size); - if (m1->taken && m2->taken) - assertNoOverlap(m1->kusochki_offset, m1->num_geoms, m2->kusochki_offset, m2->num_geoms); -} - -static void validateModel( const vk_ray_model_t *model ) { - for (int j = 0; j < ARRAYSIZE(g_ray_model_state.models_cache); ++j) { - validateModelPair(model, g_ray_model_state.models_cache + j); - } -} - -static void validateModels( void ) { - for (int i = 0; i < ARRAYSIZE(g_ray_model_state.models_cache); ++i) { - validateModel(g_ray_model_state.models_cache + i); - } -} - -void XVK_RayModel_Validate( void ) { - const vk_kusok_data_t* kusochki = g_ray_model_state.kusochki_buffer.mapped; - ASSERT(g_ray_model_state.frame.num_models <= ARRAYSIZE(g_ray_model_state.frame.models)); - for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) { - const vk_ray_draw_model_t *draw_model = g_ray_model_state.frame.models + i; - const vk_ray_model_t *model = draw_model->model; - int num_geoms = 1; // TODO can't validate non-dynamic models because this info is lost - ASSERT(model); - ASSERT(model->as != VK_NULL_HANDLE); - ASSERT(model->kusochki_offset < MAX_KUSOCHKI); - ASSERT(model->geoms); - ASSERT(model->num_geoms > 0); - ASSERT(model->taken); - num_geoms = model->num_geoms; - - for (int j = 0; j < num_geoms; j++) { - const vk_kusok_data_t *kusok = kusochki + j; - const vk_texture_t *tex = findTexture(kusok->material.tex_base_color); - ASSERT(tex); - ASSERT(tex->vk.image.view != VK_NULL_HANDLE); - - // uint32_t index_offset; - // uint32_t vertex_offset; - // uint32_t triangles; - } - - // Check for as model memory aliasing - for (int j = 0; j < g_ray_model_state.frame.num_models; ++j) { - const vk_ray_model_t *model2 = g_ray_model_state.frame.models[j].model; - validateModelPair(model, model2); - } - } -} - -static void applyMaterialToKusok(vk_kusok_data_t* kusok, const vk_render_geometry_t *geom) { - const xvk_material_t *const mat = XVK_GetMaterialForTextureIndex( geom->texture ); +static void applyMaterialToKusok(vk_kusok_data_t* kusok, const vk_render_geometry_t *geom, int override_texture_id, const vec4_t override_color) { + const int tex_id = override_texture_id > 0 ? override_texture_id : geom->texture; + const xvk_material_t *const mat = XVK_GetMaterialForTextureIndex( tex_id ); ASSERT(mat); // TODO split kusochki into static geometry data and potentially dynamic material data // This data is static, should never change kusok->vertex_offset = geom->vertex_offset; kusok->index_offset = geom->index_offset; - kusok->triangles = geom->element_count / 3; // Material data itself is mostly static. Except for animated textures, which just get a new material slot for each frame. kusok->material = (struct Material){ @@ -181,6 +49,13 @@ static void applyMaterialToKusok(vk_kusok_data_t* kusok, const vk_render_geometr VectorCopy(geom->emissive, kusok->emissive); Vector4Copy(mat->base_color, kusok->material.base_color); + if (override_color) { + kusok->material.base_color[0] *= override_color[0]; + kusok->material.base_color[1] *= override_color[1]; + kusok->material.base_color[2] *= override_color[2]; + kusok->material.base_color[3] *= override_color[3]; + } + // TODO should be patched by the Chrome material source itself to generate a static chrome material const qboolean HACK_chrome = geom->material == kXVkMaterialChrome; if (!mat->set && HACK_chrome) @@ -191,168 +66,9 @@ static void applyMaterialToKusok(vk_kusok_data_t* kusok, const vk_render_geometr kusok->material.tex_base_color = TEX_BASE_SKYBOX; } -vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) { - VkAccelerationStructureGeometryKHR *geoms; - uint32_t *geom_max_prim_counts; - VkAccelerationStructureBuildRangeInfoKHR *geom_build_ranges; - const VkDeviceAddress buffer_addr = R_VkBufferGetDeviceAddress(args.buffer); // TODO pass in args/have in buffer itself - const uint32_t kusochki_count_offset = R_DEBuffer_Alloc(&g_ray_model_state.kusochki_alloc, args.model->dynamic ? LifetimeDynamic : LifetimeStatic, args.model->num_geometries, 1); - vk_ray_model_t *ray_model; - int max_prims = 0; - - ASSERT(vk_core.rtx); - - if (kusochki_count_offset == ALO_ALLOC_FAILED) { - gEngine.Con_Printf(S_ERROR "Maximum number of kusochki exceeded on model %s\n", args.model->debug_name); - return NULL; - } - - // FIXME don't touch allocator each frame many times pls - geoms = Mem_Calloc(vk_core.pool, args.model->num_geometries * sizeof(*geoms)); - geom_max_prim_counts = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_max_prim_counts)); - geom_build_ranges = Mem_Calloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges)); - - /* gEngine.Con_Reportf("Loading model %s, geoms: %d\n", args.model->debug_name, args.model->num_geometries); */ - - for (int i = 0; i < args.model->num_geometries; ++i) { - vk_render_geometry_t *mg = args.model->geometries + i; - const uint32_t prim_count = mg->element_count / 3; - - max_prims += prim_count; - geom_max_prim_counts[i] = prim_count; - geoms[i] = (VkAccelerationStructureGeometryKHR) - { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, - .flags = VK_GEOMETRY_OPAQUE_BIT_KHR, // FIXME this is not true. incoming mode might have transparency eventually (and also dynamically) - .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, - .geometry.triangles = - (VkAccelerationStructureGeometryTrianglesDataKHR){ - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, - .indexType = VK_INDEX_TYPE_UINT16, - .maxVertex = mg->max_vertex, - .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, - .vertexStride = sizeof(vk_vertex_t), - .vertexData.deviceAddress = buffer_addr, - .indexData.deviceAddress = buffer_addr, - }, - }; - - geom_build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) { - .primitiveCount = prim_count, - .primitiveOffset = mg->index_offset * sizeof(uint16_t), - .firstVertex = mg->vertex_offset, - }; - } - - // FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too - vk_combuf_t *const combuf = R_VkStagingCommit(); - { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - //.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, // FIXME - .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT, // FIXME - .buffer = args.buffer, - .offset = 0, // FIXME - .size = VK_WHOLE_SIZE, // FIXME - } }; - vkCmdPipelineBarrier(combuf->cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - //VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, - 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); - } - - { - as_build_args_t asrgs = { - .geoms = geoms, - .max_prim_counts = geom_max_prim_counts, - .build_ranges = geom_build_ranges, - .n_geoms = args.model->num_geometries, - .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, - .dynamic = args.model->dynamic, - .debug_name = args.model->debug_name, - }; - ray_model = getModelFromCache(args.model->num_geometries, max_prims, geoms); //, build_size.accelerationStructureSize); - if (!ray_model) { - gEngine.Con_Printf(S_ERROR "Ran out of model cache slots\n"); - } else { - qboolean result; - asrgs.p_accel = &ray_model->as; - - DEBUG_BEGINF(combuf->cmdbuf, "build blas for %s", args.model->debug_name); - result = createOrUpdateAccelerationStructure(combuf, &asrgs, ray_model); - DEBUG_END(combuf->cmdbuf); - - if (!result) - { - gEngine.Con_Printf(S_ERROR "Could not build BLAS for %s\n", args.model->debug_name); - returnModelToCache(ray_model); - ray_model = NULL; - } else { - ray_model->kusochki_offset = kusochki_count_offset; - ray_model->dynamic = args.model->dynamic; - ray_model->material_mode = -1; - Vector4Set(ray_model->color, 1, 1, 1, 1); - Matrix4x4_LoadIdentity(ray_model->prev_transform); - - if (vk_core.debug) - validateModel(ray_model); - } - } - } - - Mem_Free(geom_build_ranges); - Mem_Free(geom_max_prim_counts); - Mem_Free(geoms); // TODO this can be cached within models_cache ?? - - //gEngine.Con_Reportf("Model %s (%p) created blas %p\n", args.model->debug_name, args.model, args.model->rtx.blas); - - return ray_model; -} - -void VK_RayModelDestroy( struct vk_ray_model_s *model ) { - ASSERT(vk_core.rtx); - if (model->as != VK_NULL_HANDLE) { - //gEngine.Con_Reportf("Model %s destroying AS=%p blas_index=%d\n", model->debug_name, model->rtx.blas, blas_index); - - vkDestroyAccelerationStructureKHR(vk_core.device, model->as, NULL); - Mem_Free(model->geoms); - memset(model, 0, sizeof(*model)); - } -} - -// TODO move this to vk_brush -static void computeConveyorSpeed(const color24 rendercolor, int tex_index, vec2_t speed) { - float sy, cy; - float flConveyorSpeed = 0.0f; - float flRate, flAngle; - vk_texture_t *texture = findTexture( tex_index ); - //gl_texture_t *texture; - - // FIXME - /* if( ENGINE_GET_PARM( PARM_QUAKE_COMPATIBLE ) && RI.currententity == gEngfuncs.GetEntityByIndex( 0 ) ) */ - /* { */ - /* // same as doom speed */ - /* flConveyorSpeed = -35.0f; */ - /* } */ - /* else */ - { - flConveyorSpeed = (rendercolor.g<<8|rendercolor.b) / 16.0f; - if( rendercolor.r ) flConveyorSpeed = -flConveyorSpeed; - } - //texture = R_GetTexture( glState.currentTextures[glState.activeTMU] ); - - flRate = fabs( flConveyorSpeed ) / (float)texture->width; - flAngle = ( flConveyorSpeed >= 0 ) ? 180 : 0; - - SinCos( flAngle * ( M_PI_F / 180.0f ), &sy, &cy ); - speed[0] = cy * flRate; - speed[1] = sy * flRate; -} - // TODO utilize uploadKusochki([1]) to avoid 2 copies of staging code -static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, uint32_t material_mode, const int *geom_indexes, int geom_indexes_count) { +#if 0 +static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, const int *geom_indexes, int geom_indexes_count) { // TODO can we sort all animated geometries (in brush) to have only a single range here? for (int i = 0; i < geom_indexes_count; ++i) { const int index = geom_indexes[i]; @@ -373,7 +89,7 @@ static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk vk_kusok_data_t *const kusochki = kusok_staging.ptr; vk_render_geometry_t *geom = render_model->geometries + index; - applyMaterialToKusok(kusochki + 0, geom); + applyMaterialToKusok(kusochki + 0, geom, -1, NULL); /* gEngine.Con_Reportf("model %s: geom=%d kuoffs=%d kustoff=%d kustsz=%d sthndl=%d\n", */ /* render_model->debug_name, */ @@ -387,115 +103,36 @@ static qboolean uploadKusochkiSubset(const vk_ray_model_t *const model, const vk } return true; } +#endif -static qboolean uploadKusochki(const vk_ray_model_t *const model, const vk_render_model_t *const render_model, uint32_t material_mode) { - const vk_staging_buffer_args_t staging_args = { - .buffer = g_ray_model_state.kusochki_buffer.buffer, - .offset = model->kusochki_offset * sizeof(vk_kusok_data_t), - .size = render_model->num_geometries * sizeof(vk_kusok_data_t), - .alignment = 16, - }; - const vk_staging_region_t kusok_staging = R_VkStagingLockForBuffer(staging_args); - - if (!kusok_staging.ptr) { - gEngine.Con_Printf(S_ERROR "Couldn't allocate staging for %d kusochkov for model %s\n", model->num_geoms, render_model->debug_name); - return false; - } - - vk_kusok_data_t *const kusochki = kusok_staging.ptr; - - for (int i = 0; i < render_model->num_geometries; ++i) { - vk_render_geometry_t *geom = render_model->geometries + i; - applyMaterialToKusok(kusochki + i, geom); - } - - /* gEngine.Con_Reportf("model %s: geom=%d kuoffs=%d kustoff=%d kustsz=%d sthndl=%d\n", */ - /* render_model->debug_name, */ - /* render_model->num_geometries, */ - /* model->kusochki_offset, */ - /* staging_args.offset, staging_args.size, */ - /* kusok_staging.handle */ - /* ); */ - - R_VkStagingUnlock(kusok_staging.handle); - return true; -} - -void VK_RayFrameAddModel( vk_ray_model_t *model, const vk_render_model_t *render_model) { - vk_ray_draw_model_t* draw_model = g_ray_model_state.frame.models + g_ray_model_state.frame.num_models; - - ASSERT(vk_core.rtx); - ASSERT(g_ray_model_state.frame.num_models <= ARRAYSIZE(g_ray_model_state.frame.models)); - ASSERT(model->num_geoms == render_model->num_geometries); - - if (g_ray_model_state.frame.num_models == ARRAYSIZE(g_ray_model_state.frame.models)) { - gEngine.Con_Printf(S_ERROR "Ran out of AccelerationStructure slots\n"); - return; - } - - ASSERT(model->as != VK_NULL_HANDLE); - - uint32_t material_mode = MATERIAL_MODE_OPAQUE; - switch (render_model->render_type) { +// TODO this material mapping is context dependent. I.e. different entity types might need different ray tracing behaviours for +// same render_mode/type and even texture. +static uint32_t materialModeFromRenderType(vk_render_type_e render_type) { + switch (render_type) { case kVkRenderTypeSolid: - material_mode = MATERIAL_MODE_OPAQUE; + return MATERIAL_MODE_OPAQUE; break; case kVkRenderType_A_1mA_RW: // blend: scr*a + dst*(1-a), depth: RW case kVkRenderType_A_1mA_R: // blend: scr*a + dst*(1-a), depth test // FIXME where is MATERIAL_MODE_TRANSLUCENT??1 - material_mode = MATERIAL_MODE_BLEND_MIX; + return MATERIAL_MODE_BLEND_MIX; break; case kVkRenderType_A_1: // blend: scr*a + dst, no depth test or write; sprite:kRenderGlow only - material_mode = MATERIAL_MODE_BLEND_GLOW; + return MATERIAL_MODE_BLEND_GLOW; break; case kVkRenderType_A_1_R: // blend: scr*a + dst, depth test case kVkRenderType_1_1_R: // blend: scr + dst, depth test - material_mode = MATERIAL_MODE_BLEND_ADD; + return MATERIAL_MODE_BLEND_ADD; break; case kVkRenderType_AT: // no blend, depth RW, alpha test - material_mode = MATERIAL_MODE_OPAQUE_ALPHA_TEST; + return MATERIAL_MODE_OPAQUE_ALPHA_TEST; break; default: - gEngine.Host_Error("Unexpected render type %d\n", render_model->render_type); + gEngine.Host_Error("Unexpected render type %d\n", render_type); } - // Re-upload kusochki if needed - // TODO all of this will not be required when model data is split out from Kusok struct -#define Vector4Compare(v1,v2) ((v1)[0]==(v2)[0] && (v1)[1]==(v2)[1] && (v1)[2]==(v2)[2] && (v1)[3]==(v2)[3]) - const qboolean upload_kusochki = (model->material_mode != material_mode - || !Vector4Compare(model->color, render_model->color) - || memcmp(model->prev_transform, render_model->prev_transform, sizeof(matrix4x4)) != 0); - -// TODO optimize: -// - collect list of geoms for which we could update anything (animated textues, uvs, etc) -// - update only those through staging -// - also consider tracking whether the main model color has changed (that'd need to update everything yay) - - if (upload_kusochki) { - model->material_mode = material_mode; - Vector4Copy(render_model->color, model->color); - Matrix4x4_Copy(model->prev_transform, render_model->prev_transform); - if (!uploadKusochki(model, render_model, material_mode)) { - return; - } - } else { - if (!uploadKusochkiSubset(model, render_model, material_mode, render_model->geometries_changed, render_model->geometries_changed_count)) - return; - } - - for (int i = 0; i < render_model->dynamic_polylights_count; ++i) { - rt_light_add_polygon_t *const polylight = render_model->dynamic_polylights + i; - polylight->transform_row = (const matrix3x4*)render_model->transform; - polylight->dynamic = true; - RT_LightAddPolygon(polylight); - } - - draw_model->model = model; - draw_model->material_mode = material_mode; - Matrix3x4_Copy(draw_model->transform_row, render_model->transform); - - g_ray_model_state.frame.num_models++; + return MATERIAL_MODE_OPAQUE; } void RT_RayModel_Clear(void) { @@ -503,27 +140,307 @@ void RT_RayModel_Clear(void) { } void XVK_RayModel_ClearForNextFrame( void ) { - // FIXME we depend on the fact that only a single frame can be in flight - // currently framectl waits for the queue to complete before returning - // so we can be sure here that previous frame is complete and we're free to - // destroy/reuse dynamic ASes from previous frame - for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) { - vk_ray_draw_model_t *model = g_ray_model_state.frame.models + i; - ASSERT(model->model); - - if (!model->model->dynamic) - continue; - - returnModelToCache(model->model); - model->model = NULL; - } - - g_ray_model_state.frame.num_models = 0; - - // TODO N frames in flight - // HACK: blas caching requires persistent memory - // proper fix would need some other memory allocation strategy - // VK_RingBuffer_ClearFrame(&g_rtx.accels_buffer_alloc); - //VK_RingBuffer_ClearFrame(&g_ray_model_state.kusochki_alloc); + g_ray_model_state.frame.instances_count = 0; R_DEBuffer_Flip(&g_ray_model_state.kusochki_alloc); } + +rt_kusochki_t RT_KusochkiAllocLong(int count) { + // TODO Proper block allocator, not just double-ended buffer + uint32_t kusochki_offset = R_DEBuffer_Alloc(&g_ray_model_state.kusochki_alloc, LifetimeStatic, count, 1); + + if (kusochki_offset == ALO_ALLOC_FAILED) { + gEngine.Con_Printf(S_ERROR "Maximum number of kusochki exceeded\n"); + return (rt_kusochki_t){0,0,-1}; + } + + return (rt_kusochki_t){ + .offset = kusochki_offset, + .count = count, + .internal_index__ = 0, // ??? + }; +} + +uint32_t RT_KusochkiAllocOnce(int count) { + // TODO Proper block allocator + uint32_t kusochki_offset = R_DEBuffer_Alloc(&g_ray_model_state.kusochki_alloc, LifetimeDynamic, count, 1); + + if (kusochki_offset == ALO_ALLOC_FAILED) { + gEngine.Con_Printf(S_ERROR "Maximum number of kusochki exceeded\n"); + return ALO_ALLOC_FAILED; + } + + return kusochki_offset; +} + +void RT_KusochkiFree(const rt_kusochki_t *kusochki) { + // TODO block alloc + PRINT_NOT_IMPLEMENTED(); +} + +// TODO this function can't really fail. It'd mean that staging is completely broken. +qboolean RT_KusochkiUpload(uint32_t kusochki_offset, const struct vk_render_geometry_s *geoms, int geoms_count, int override_texture_id, const vec4_t *override_colors) { + const vk_staging_buffer_args_t staging_args = { + .buffer = g_ray_model_state.kusochki_buffer.buffer, + .offset = kusochki_offset * sizeof(vk_kusok_data_t), + .size = geoms_count * sizeof(vk_kusok_data_t), + .alignment = 16, + }; + const vk_staging_region_t kusok_staging = R_VkStagingLockForBuffer(staging_args); + + if (!kusok_staging.ptr) { + gEngine.Con_Printf(S_ERROR "Couldn't allocate staging for %d kusochkov\n", geoms_count); + return false; + } + + vk_kusok_data_t *const p = kusok_staging.ptr; + for (int i = 0; i < geoms_count; ++i) { + const vk_render_geometry_t *geom = geoms + i; + applyMaterialToKusok(p + i, geom, override_texture_id, override_colors ? override_colors[i] : NULL); + } + + R_VkStagingUnlock(kusok_staging.handle); + return true; +} + +struct rt_model_s *RT_ModelCreate(rt_model_create_t args) { + const rt_kusochki_t kusochki = RT_KusochkiAllocLong(args.geometries_count); + if (kusochki.count == 0) { + gEngine.Con_Printf(S_ERROR "Cannot allocate kusochki for %s\n", args.debug_name); + return NULL; + } + + struct rt_blas_s* blas = RT_BlasCreate(args.debug_name, args.usage); + if (!blas) { + gEngine.Con_Printf(S_ERROR "Cannot create BLAS for %s\n", args.debug_name); + goto fail; + } + + if (!RT_BlasBuild(blas, args.geometries, args.geometries_count)) { + gEngine.Con_Printf(S_ERROR "Cannot build BLAS for %s\n", args.debug_name); + goto fail; + } + + RT_KusochkiUpload(kusochki.offset, args.geometries, args.geometries_count, -1, NULL); + + { + rt_model_t *const ret = Mem_Malloc(vk_core.pool, sizeof(*ret)); + ret->blas = blas; + ret->blas_addr = RT_BlasGetDeviceAddress(ret->blas); + ret->kusochki = kusochki; + return ret; + } + +fail: + if (blas) + RT_BlasDestroy(blas); + + if (kusochki.count) + RT_KusochkiFree(&kusochki); + + return NULL; +} + +void RT_ModelDestroy(struct rt_model_s* model) { + if (!model) + return; + + if (model->blas) + RT_BlasDestroy(model->blas); + + if (model->kusochki.count) + RT_KusochkiFree(&model->kusochki); + + Mem_Free(model); +} + +qboolean RT_ModelUpdate(struct rt_model_s *model, const struct vk_render_geometry_s *geometries, int geometries_count) { + return RT_BlasBuild(model->blas, geometries, geometries_count); +} + +qboolean RT_ModelUpdateMaterials(struct rt_model_s *model, const struct vk_render_geometry_s *geometries, int geometries_count, const int *geom_indices, int geom_indices_count) { + if (!geom_indices_count) + return true; + + int begin = 0; + for (int i = 1; i < geom_indices_count; ++i) { + const int geom_index = geom_indices[i]; + ASSERT(geom_index >= 0); + ASSERT(geom_index < geometries_count); + + if (geom_indices[i - 1] + 1 != geom_index) { + const int offset = geom_indices[begin]; + const int count = i - begin; + ASSERT(offset + count <= geometries_count); + if (!RT_KusochkiUpload(model->kusochki.offset + offset, geometries + offset, count, -1, NULL)) + return false; + + begin = i; + } + } + + { + const int offset = geom_indices[begin]; + const int count = geom_indices_count - begin; + ASSERT(offset + count <= geometries_count); + if (!RT_KusochkiUpload(model->kusochki.offset + offset, geometries + offset, count, -1, NULL)) + return false; + } + + return true; +} + +rt_draw_instance_t *getDrawInstance(void) { + if (g_ray_model_state.frame.instances_count >= ARRAYSIZE(g_ray_model_state.frame.instances)) { + gEngine.Con_Printf(S_ERROR "Too many RT draw instances, max = %d\n", (int)(ARRAYSIZE(g_ray_model_state.frame.instances))); + return NULL; + } + + return g_ray_model_state.frame.instances + (g_ray_model_state.frame.instances_count++); +} + +void RT_FrameAddModel( struct rt_model_s *model, rt_frame_add_model_t args ) { + if (!model || !model->blas) + return; + + uint32_t kusochki_offset = model->kusochki.offset; + + if (args.override.textures > 0) { + kusochki_offset = RT_KusochkiAllocOnce(args.override.geoms_count); + if (kusochki_offset == ALO_ALLOC_FAILED) + return; + + if (!RT_KusochkiUpload(kusochki_offset, args.override.geoms, args.override.geoms_count, args.override.textures, NULL)) { + gEngine.Con_Printf(S_ERROR "Couldn't upload kusochki for instanced model\n"); + return; + } + } + + for (int i = 0; i < args.dynamic_polylights_count; ++i) { + rt_light_add_polygon_t *const polylight = args.dynamic_polylights + i; + polylight->transform_row = (const matrix3x4*)args.transform; + polylight->dynamic = true; + RT_LightAddPolygon(polylight); + } + + rt_draw_instance_t *const draw_instance = getDrawInstance(); + if (!draw_instance) + return; + + draw_instance->blas_addr = model->blas_addr; + draw_instance->kusochki_offset = kusochki_offset; + draw_instance->material_mode = materialModeFromRenderType(args.render_type); + Vector4Copy(*args.color, draw_instance->color); + Matrix3x4_Copy(draw_instance->transform_row, args.transform); + Matrix4x4_Copy(draw_instance->prev_transform_row, args.prev_transform); +} + +#define MAX_RT_DYNAMIC_GEOMETRIES 256 + +typedef struct { + struct rt_blas_s *blas; + VkDeviceAddress blas_addr; + vk_render_geometry_t geometries[MAX_RT_DYNAMIC_GEOMETRIES]; + int geometries_count; + vec4_t colors[MAX_RT_DYNAMIC_GEOMETRIES]; +} rt_dynamic_t; + +static const char* group_names[MATERIAL_MODE_COUNT] = { + "MATERIAL_MODE_OPAQUE", + "MATERIAL_MODE_OPAQUE_ALPHA_TEST", + "MATERIAL_MODE_TRANSLUCENT", + "MATERIAL_MODE_BLEND_ADD", + "MATERIAL_MODE_BLEND_MIX", + "MATERIAL_MODE_BLEND_GLOW", +}; + +static struct { + rt_dynamic_t groups[MATERIAL_MODE_COUNT]; +} g_dyn; + +qboolean RT_DynamicModelInit(void) { + for (int i = 0; i < MATERIAL_MODE_COUNT; ++i) { + struct rt_blas_s *blas = RT_BlasCreate(group_names[i], kBlasBuildDynamicFast); + if (!blas) { + // FIXME destroy allocated + gEngine.Con_Printf(S_ERROR "Couldn't create blas for %s\n", group_names[i]); + return false; + } + + if (!RT_BlasPreallocate(blas, (rt_blas_preallocate_t){ + // TODO better estimates for these constants + .max_geometries = MAX_RT_DYNAMIC_GEOMETRIES, + .max_prims_per_geometry = 256, + .max_vertex_per_geometry = 256, + })) { + // FIXME destroy allocated + gEngine.Con_Printf(S_ERROR "Couldn't preallocate blas for %s\n", group_names[i]); + return false; + } + g_dyn.groups[i].blas = blas; + g_dyn.groups[i].blas_addr = RT_BlasGetDeviceAddress(blas); + } + + return true; +} + +void RT_DynamicModelShutdown(void) { + for (int i = 0; i < MATERIAL_MODE_COUNT; ++i) { + RT_BlasDestroy(g_dyn.groups[i].blas); + } +} + +void RT_DynamicModelProcessFrame(void) { + for (int i = 0; i < MATERIAL_MODE_COUNT; ++i) { + rt_dynamic_t *const dyn = g_dyn.groups + i; + if (!dyn->geometries_count) + continue; + + rt_draw_instance_t* draw_instance; + const uint32_t kusochki_offset = RT_KusochkiAllocOnce(dyn->geometries_count); + if (kusochki_offset == ALO_ALLOC_FAILED) { + gEngine.Con_Printf(S_ERROR "Couldn't allocate kusochki once for %d geoms of %s, skipping\n", dyn->geometries_count, group_names[i]); + goto tail; + } + + // FIXME override color + if (!RT_KusochkiUpload(kusochki_offset, dyn->geometries, dyn->geometries_count, -1, dyn->colors)) { + gEngine.Con_Printf(S_ERROR "Couldn't build blas for %d geoms of %s, skipping\n", dyn->geometries_count, group_names[i]); + goto tail; + } + + if (!RT_BlasBuild(dyn->blas, dyn->geometries, dyn->geometries_count)) { + gEngine.Con_Printf(S_ERROR "Couldn't build blas for %d geoms of %s, skipping\n", dyn->geometries_count, group_names[i]); + goto tail; + } + + draw_instance = getDrawInstance(); + if (!draw_instance) + goto tail; + + draw_instance->blas_addr = dyn->blas_addr; + draw_instance->kusochki_offset = kusochki_offset; + draw_instance->material_mode = i; + Vector4Set(draw_instance->color, 1, 1, 1, 1); + Matrix3x4_LoadIdentity(draw_instance->transform_row); + Matrix4x4_LoadIdentity(draw_instance->prev_transform_row); + +tail: + dyn->geometries_count = 0; + } +} + +void RT_FrameAddOnce( rt_frame_add_once_t args ) { + const int material_mode = materialModeFromRenderType(args.render_type); + rt_dynamic_t *const dyn = g_dyn.groups + material_mode; + + for (int i = 0; i < args.geometries_count; ++i) { + if (dyn->geometries_count == MAX_RT_DYNAMIC_GEOMETRIES) { + gEngine.Con_Printf(S_ERROR "Too many dynamic geometries for mode %s\n", group_names[material_mode]); + break; + } + + Vector4Copy(*args.color, dyn->colors[dyn->geometries_count]); + dyn->geometries[dyn->geometries_count++] = args.geometries[i]; + } +} + diff --git a/ref/vk/vk_render.c b/ref/vk/vk_render.c index c8705011..449c7fce 100644 --- a/ref/vk/vk_render.c +++ b/ref/vk/vk_render.c @@ -12,7 +12,6 @@ #include "vk_rtx.h" #include "vk_descriptor.h" #include "vk_framectl.h" // FIXME needed for dynamic models cmdbuf -#include "vk_previous_frame.h" #include "alolcator.h" #include "profiler.h" #include "r_speeds.h" @@ -25,6 +24,8 @@ #include +#define MODULE_NAME "render" + #define MAX_UNIFORM_SLOTS (MAX_SCENE_ENTITIES * 2 /* solid + trans */ + 1) #define PROFILER_SCOPES(X) \ @@ -336,8 +337,8 @@ qboolean VK_RenderInit( void ) { if (!createPipelines()) return false; - R_SpeedsRegisterMetric(&g_render.stats.dynamic_model_count, "models_dynamic", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_render.stats.models_count, "models", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_render.stats.dynamic_model_count, "models_dynamic", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_render.stats.models_count, "models", kSpeedsMetricCount); return true; } @@ -641,33 +642,48 @@ void VK_RenderEndRTX( struct vk_combuf_s* combuf, VkImageView img_dst_view, VkIm } } -qboolean VK_RenderModelInit( vk_render_model_t *model ) { - if (vk_core.rtx && (g_render_state.current_frame_is_ray_traced || !model->dynamic)) { - const VkBuffer geom_buffer = R_GeometryBuffer_Get(); - // TODO runtime rtx switch: ??? - const vk_ray_model_init_t args = { - .buffer = geom_buffer, - .model = model, - }; - model->ray_model = VK_RayModelCreate(args); - model->dynamic_polylights = NULL; - model->dynamic_polylights_count = 0; - Matrix4x4_LoadIdentity(model->transform); - return !!model->ray_model; - } +qboolean R_RenderModelCreate( vk_render_model_t *model, vk_render_model_init_t args ) { + memset(model, 0, sizeof(*model)); + Q_strncpy(model->debug_name, args.name, sizeof(model->debug_name)); - // TODO pre-bake optimal draws - return true; + model->geometries = args.geometries; + model->num_geometries = args.geometries_count; + + if (!vk_core.rtx) + return true; + + model->rt_model = RT_ModelCreate((rt_model_create_t){ + .debug_name = model->debug_name, + .geometries = args.geometries, + .geometries_count = args.geometries_count, + .usage = args.dynamic ? kBlasBuildDynamicUpdate : kBlasBuildStatic, + }); + return !!model->rt_model; } -void VK_RenderModelDestroy( vk_render_model_t* model ) { - // FIXME why the condition? we should do the cleanup anyway - if (vk_core.rtx && (g_render_state.current_frame_is_ray_traced || !model->dynamic)) { - if (model->ray_model) - VK_RayModelDestroy(model->ray_model); - if (model->dynamic_polylights) - Mem_Free(model->dynamic_polylights); - } +void R_RenderModelDestroy( vk_render_model_t* model ) { + if (model->dynamic_polylights) + Mem_Free(model->dynamic_polylights); + + if (model->rt_model) + RT_ModelDestroy(model->rt_model); +} + +qboolean R_RenderModelUpdate( const vk_render_model_t *model ) { + // Non-RT rendering doesn't need to update anything, assuming that geometry regions offsets are not changed, and losing intermediate states is fine + if (!g_render_state.current_frame_is_ray_traced) + return true; + + ASSERT(model->rt_model); + + return RT_ModelUpdate(model->rt_model, model->geometries, model->num_geometries); +} + +qboolean R_RenderModelUpdateMaterials( const vk_render_model_t *model, const int *geom_indices, int geom_indices_count) { + if (!model->rt_model) + return true; + + return RT_ModelUpdateMaterials(model->rt_model, model->geometries, model->num_geometries, geom_indices, geom_indices_count); } static void uboComputeAndSetMVPFromModel( const matrix4x4 model ) { @@ -676,47 +692,43 @@ static void uboComputeAndSetMVPFromModel( const matrix4x4 model ) { Matrix4x4_ToArrayFloatGL(mvp, (float*)g_render_state.dirty_uniform_data.mvp); } -void VK_RenderModelDraw( const cl_entity_t *ent, vk_render_model_t* model ) { - int current_texture = -1; +typedef struct { + const char *debug_name; + int lightmap; // TODO per-geometry + const vk_render_geometry_t *geometries; + int geometries_count; + const matrix4x4 *transform; + const vec4_t *color; + int render_type; + int textures_override; +} trad_submit_t; + +static void submitToTraditionalRender( trad_submit_t args ) { + int current_texture = args.textures_override; int element_count = 0; int index_offset = -1; int vertex_offset = 0; - uboComputeAndSetMVPFromModel( model->transform ); + uboComputeAndSetMVPFromModel( *args.transform ); // TODO get rid of this dirty ubo thing - Vector4Copy(model->color, g_render_state.dirty_uniform_data.color); - ASSERT(model->lightmap <= MAX_LIGHTMAPS); - const int lightmap = model->lightmap > 0 ? tglob.lightmapTextures[model->lightmap - 1] : tglob.whiteTexture; + Vector4Copy(*args.color, g_render_state.dirty_uniform_data.color); + ASSERT(args.lightmap <= MAX_LIGHTMAPS); + const int lightmap = args.lightmap > 0 ? tglob.lightmapTextures[args.lightmap - 1] : tglob.whiteTexture; - ++g_render.stats.models_count; + drawCmdPushDebugLabelBegin( args.debug_name ); - if (g_render_state.current_frame_is_ray_traced) { - if (ent != NULL && model != NULL) { - R_PrevFrame_SaveCurrentState( ent->index, model->transform ); - R_PrevFrame_ModelTransform( ent->index, model->prev_transform ); - } - else { - Matrix4x4_Copy( model->prev_transform, model->transform ); - } - - VK_RayFrameAddModel(model->ray_model, model); - - return; - } - - drawCmdPushDebugLabelBegin( model->debug_name ); - - for (int i = 0; i < model->num_geometries; ++i) { - const vk_render_geometry_t *geom = model->geometries + i; - const qboolean split = current_texture != geom->texture + for (int i = 0; i < args.geometries_count; ++i) { + const vk_render_geometry_t *geom = args.geometries + i; + const int tex = args.textures_override > 0 ? args.textures_override : geom->texture; + const qboolean split = current_texture != tex || vertex_offset != geom->vertex_offset || (index_offset + element_count) != geom->index_offset; // We only support indexed geometry ASSERT(geom->index_offset >= 0); - if (geom->texture < 0) + if (tex < 0) continue; if (split) { @@ -724,7 +736,7 @@ void VK_RenderModelDraw( const cl_entity_t *ent, vk_render_model_t* model ) { render_draw_t draw = { .lightmap = lightmap, .texture = current_texture, - .pipeline_index = model->render_type, + .pipeline_index = args.render_type, .element_count = element_count, .vertex_offset = vertex_offset, .index_offset = index_offset, @@ -733,7 +745,7 @@ void VK_RenderModelDraw( const cl_entity_t *ent, vk_render_model_t* model ) { drawCmdPushDraw( &draw ); } - current_texture = geom->texture; + current_texture = tex; index_offset = geom->index_offset; vertex_offset = geom->vertex_offset; element_count = 0; @@ -748,7 +760,7 @@ void VK_RenderModelDraw( const cl_entity_t *ent, vk_render_model_t* model ) { const render_draw_t draw = { .lightmap = lightmap, .texture = current_texture, - .pipeline_index = model->render_type, + .pipeline_index = args.render_type, .element_count = element_count, .vertex_offset = vertex_offset, .index_offset = index_offset, @@ -760,50 +772,83 @@ void VK_RenderModelDraw( const cl_entity_t *ent, vk_render_model_t* model ) { drawCmdPushDebugLabelEnd(); } -#define MAX_DYNAMIC_GEOMETRY 256 +void R_RenderModelDraw(const vk_render_model_t *model, r_model_draw_t args) { + ++g_render.stats.models_count; -static struct { - vk_render_model_t model; - matrix4x4 transform; - vk_render_geometry_t geometries[MAX_DYNAMIC_GEOMETRY]; -} g_dynamic_model = {0}; - -void VK_RenderModelDynamicBegin( vk_render_type_e render_type, const vec4_t color, const matrix3x4 transform, const char *debug_name_fmt, ... ) { - va_list argptr; - va_start( argptr, debug_name_fmt ); - vsnprintf(g_dynamic_model.model.debug_name, sizeof(g_dynamic_model.model.debug_name), debug_name_fmt, argptr ); - va_end( argptr ); - - ASSERT(!g_dynamic_model.model.geometries); - g_dynamic_model.model.geometries = g_dynamic_model.geometries; - g_dynamic_model.model.num_geometries = 0; - g_dynamic_model.model.render_type = render_type; - g_dynamic_model.model.lightmap = 0; - Vector4Copy(color, g_dynamic_model.model.color); - Matrix4x4_LoadIdentity(g_dynamic_model.transform); - if (transform) - Matrix3x4_Copy(g_dynamic_model.transform, transform); + if (g_render_state.current_frame_is_ray_traced) { + ASSERT(model->rt_model); + RT_FrameAddModel(model->rt_model, (rt_frame_add_model_t){ + .render_type = args.render_type, + .transform = (const matrix3x4*)args.transform, + .prev_transform = (const matrix3x4*)args.prev_transform, + .color = args.color, + .dynamic_polylights = model->dynamic_polylights, + .dynamic_polylights_count = model->dynamic_polylights_count, + .override = { + .textures = args.textures_override, + .geoms = model->geometries, + .geoms_count = model->num_geometries, + }, + }); + } else { + submitToTraditionalRender((trad_submit_t){ + .debug_name = model->debug_name, + .lightmap = model->lightmap, + .geometries = model->geometries, + .geometries_count = model->num_geometries, + .transform = args.transform, + .color = args.color, + .render_type = args.render_type, + .textures_override = args.textures_override + }); + } } -void VK_RenderModelDynamicAddGeometry( const vk_render_geometry_t *geom ) { - ASSERT(g_dynamic_model.model.geometries); - if (g_dynamic_model.model.num_geometries == MAX_DYNAMIC_GEOMETRY) { - ERROR_THROTTLED(10, "Ran out of dynamic model geometry slots for model %s", g_dynamic_model.model.debug_name); + +void R_RenderDrawOnce(r_draw_once_t args) { + r_geometry_buffer_lock_t buffer; + if (!R_GeometryBufferAllocOnceAndLock( &buffer, args.vertices_count, args.indices_count)) { + gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for dynamic draw\n"); return; } - g_dynamic_model.geometries[g_dynamic_model.model.num_geometries++] = *geom; -} -void VK_RenderModelDynamicCommit( void ) { - ASSERT(g_dynamic_model.model.geometries); + memcpy(buffer.vertices.ptr, args.vertices, sizeof(vk_vertex_t) * args.vertices_count); + memcpy(buffer.indices.ptr, args.indices, sizeof(uint16_t) * args.indices_count); - if (g_dynamic_model.model.num_geometries > 0) { - g_render.stats.dynamic_model_count++; - g_dynamic_model.model.dynamic = true; - VK_RenderModelInit( &g_dynamic_model.model ); - Matrix4x4_Copy(g_dynamic_model.model.transform, g_dynamic_model.transform); - VK_RenderModelDraw( NULL, &g_dynamic_model.model ); + R_GeometryBufferUnlock( &buffer ); + + const vk_render_geometry_t geometry = { + .texture = args.texture, + .material = kXVkMaterialRegular, + + .max_vertex = args.vertices_count, + .vertex_offset = buffer.vertices.unit_offset, + + .element_count = args.indices_count, + .index_offset = buffer.indices.unit_offset, + + .emissive = { (*args.color)[0], (*args.color)[1], (*args.color)[2] }, + }; + + if (g_render_state.current_frame_is_ray_traced) { + RT_FrameAddOnce((rt_frame_add_once_t){ + .debug_name = args.name, + .geometries = &geometry, + .color = args.color, + .geometries_count = 1, + .render_type = args.render_type, + }); + } else { + submitToTraditionalRender((trad_submit_t){ + .debug_name = args.name, + .lightmap = 0, + .geometries = &geometry, + .geometries_count = 1, + .transform = &m_matrix4x4_identity, + .color = args.color, + .render_type = args.render_type, + .textures_override = -1, + }); } - g_dynamic_model.model.debug_name[0] = '\0'; - g_dynamic_model.model.geometries = NULL; + g_render.stats.dynamic_model_count++; } diff --git a/ref/vk/vk_render.h b/ref/vk/vk_render.h index 6ee50cfb..127e1b25 100644 --- a/ref/vk/vk_render.h +++ b/ref/vk/vk_render.h @@ -97,46 +97,73 @@ typedef enum { } vk_render_type_e; struct rt_light_add_polygon_s; -struct vk_ray_model_s; +struct rt_model_s; typedef struct vk_render_model_s { #define MAX_MODEL_NAME_LENGTH 64 char debug_name[MAX_MODEL_NAME_LENGTH]; - vk_render_type_e render_type; - vec4_t color; + // TODO per-geometry? int lightmap; // <= 0 if no lightmap int num_geometries; vk_render_geometry_t *geometries; - int geometries_changed_count; - int *geometries_changed; + struct rt_model_s *rt_model; // This model will be one-frame only, its buffers are not preserved between frames + // TODO deprecate qboolean dynamic; - // Non-NULL only for ray tracing - struct vk_ray_model_s *ray_model; - // Polylights which need to be added per-frame dynamically // Used for non-worldmodel brush models which are not static + // TODO this doesn't belong here at all struct rt_light_add_polygon_s *dynamic_polylights; int dynamic_polylights_count; - - matrix4x4 transform; - - // previous frame ObjectToWorld (model) matrix - matrix4x4 prev_transform; } vk_render_model_t; -qboolean VK_RenderModelInit( vk_render_model_t* model ); -void VK_RenderModelDestroy( vk_render_model_t* model ); -void VK_RenderModelDraw( const cl_entity_t *ent, vk_render_model_t* model ); +// Initialize model from scratch +typedef struct { + const char *name; + vk_render_geometry_t *geometries; + int geometries_count; -void VK_RenderModelDynamicBegin( vk_render_type_e render_type, const vec4_t color, const matrix3x4 transform, const char *debug_name_fmt, ... ); -void VK_RenderModelDynamicAddGeometry( const vk_render_geometry_t *geom ); -void VK_RenderModelDynamicCommit( void ); + // Geometry data can and will be updated + // Upading geometry locations is not supported though, only vertex/index values + qboolean dynamic; +} vk_render_model_init_t; +qboolean R_RenderModelCreate( vk_render_model_t *model, vk_render_model_init_t args ); +void R_RenderModelDestroy( vk_render_model_t* model ); + +qboolean R_RenderModelUpdate( const vk_render_model_t *model ); +qboolean R_RenderModelUpdateMaterials( const vk_render_model_t *model, const int *geom_indices, int geom_indices_count); + +typedef struct { + vk_render_type_e render_type; + + // These are "consumed": copied into internal storage and can be pointers to stack vars + const vec4_t *color; + const matrix4x4 *transform, *prev_transform; + + // Global texture override if > 0 + // Used by sprite+quad instancing + int textures_override; +} r_model_draw_t; + +void R_RenderModelDraw(const vk_render_model_t *model, r_model_draw_t args); + +typedef struct { + const char *name; + const struct vk_vertex_s *vertices; + const uint16_t *indices; + int vertices_count, indices_count; + + int render_type; + int texture; + const vec4_t *emissive; + const vec4_t *color; +} r_draw_once_t; +void R_RenderDrawOnce(r_draw_once_t args); void VK_RenderDebugLabelBegin( const char *label ); void VK_RenderDebugLabelEnd( void ); diff --git a/ref/vk/vk_rmain.c b/ref/vk/vk_rmain.c index 776ef785..6beb48fa 100644 --- a/ref/vk/vk_rmain.c +++ b/ref/vk/vk_rmain.c @@ -112,16 +112,12 @@ static void R_InitSkyClouds( struct mip_s *mt, struct texture_s *tx, qboolean cu extern void GL_SubdivideSurface( msurface_t *fa ); - -static void Mod_LoadAliasModel( model_t *mod, const void *buffer, qboolean *loaded ) -{ - PRINT_NOT_IMPLEMENTED_ARGS("(%p, %s), %p, %d", mod, mod->name, buffer, *loaded); -} - static qboolean Mod_ProcessRenderData( model_t *mod, qboolean create, const byte *buffer ) { qboolean loaded = true; + //gEngine.Con_Reportf("%s(%s, create=%d)\n", __FUNCTION__, mod->name, create); + // TODO does this ever happen? if (!create && mod->type == mod_brush) gEngine.Con_Printf( S_WARN "VK FIXME Trying to unload brush model %s\n", mod->name); @@ -131,17 +127,19 @@ static qboolean Mod_ProcessRenderData( model_t *mod, qboolean create, const byte switch( mod->type ) { case mod_studio: - Mod_LoadStudioModel( mod, buffer, &loaded ); + // This call happens before we get R_NewMap, which frees all current buffers + // So we can't really load anything here + // TODO we might benefit a tiny bit (a few ms loading time) from reusing studio models from previous map break; case mod_sprite: Mod_LoadSpriteModel( mod, buffer, &loaded, mod->numtexinfo ); break; case mod_alias: - Mod_LoadAliasModel( mod, buffer, &loaded ); + // TODO what ARE mod_alias? We just don't know. break; case mod_brush: - // FIXME this happens before we get R_NewMap, which frees all current buffers - // loaded = VK_LoadBrushModel( mod, buffer ); + // This call happens before we get R_NewMap, which frees all current buffers + // So we can't really load anything here break; default: gEngine.Host_Error( "Mod_LoadModel: unsupported type %d\n", mod->type ); } @@ -151,10 +149,14 @@ static qboolean Mod_ProcessRenderData( model_t *mod, qboolean create, const byte gEngine.drawFuncs->Mod_ProcessUserData( mod, create, buffer ); if( !create ) { - switch( mod->type ) - { + switch( mod->type ) { case mod_brush: - VK_BrushModelDestroy( mod ); + // Empirically, this function only attempts to destroy the worldmodel before loading the next map. + // However, all brush models need to be destroyed. Use this as a signal to destroy them too. + // Assert that this observation is correct. + // ASSERT(mod == gEngine.pfnGetModelByIndex(1)); not correct when closing the game. At this point model count is zero. + + R_SceneMapDestroy(); break; default: PRINT_NOT_IMPLEMENTED_ARGS("destroy (%p, %d, %s)", mod, mod->type, mod->name); diff --git a/ref/vk/vk_rtx.c b/ref/vk/vk_rtx.c index fafdd40a..adaad9e2 100644 --- a/ref/vk/vk_rtx.c +++ b/ref/vk/vk_rtx.c @@ -15,8 +15,8 @@ #include "vk_ray_internal.h" #include "vk_staging.h" #include "vk_textures.h" -#include "vk_previous_frame.h" #include "vk_combuf.h" +#include "vk_logs.h" #include "alolcator.h" @@ -26,6 +26,8 @@ #include +#define LOG_MODULE LogModule_RT + #define MAX_FRAMES_IN_FLIGHT 2 // TODO settings/realtime modifiable/adaptive @@ -135,7 +137,6 @@ void VK_RayFrameBegin( void ) { RT_VkAccelFrameBegin(); XVK_RayModel_ClearForNextFrame(); - R_PrevFrame_StartFrame(); RT_LightsFrameBegin(); } @@ -170,18 +171,6 @@ typedef struct { static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* args) { const VkCommandBuffer cmdbuf = combuf->cmdbuf; - // TODO move this to "TLAS producer" - g_rtx.res[ExternalResource_tlas].resource = (vk_resource_t){ - .type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, - .value = (vk_descriptor_value_t){ - .accel = (VkWriteDescriptorSetAccelerationStructureKHR) { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, - .accelerationStructureCount = 1, - .pAccelerationStructures = &g_accel.tlas, - .pNext = NULL, - }, - }, - }; #define RES_SET_BUFFER(name, type_, source_, offset_, size_) \ g_rtx.res[ExternalResource_##name].resource = (vk_resource_t){ \ @@ -278,24 +267,14 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a } DEBUG_BEGIN(cmdbuf, "yay tracing"); - RT_VkAccelPrepareTlas(combuf); - prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y); - // 4. Barrier for TLAS build - { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .buffer = g_accel.accels_buffer.buffer, - .offset = 0, - .size = VK_WHOLE_SIZE, - } }; - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); - } + // Feed tlas with dynamic data + RT_DynamicModelProcessFrame(); + + // TODO move this to "TLAS producer" + g_rtx.res[ExternalResource_tlas].resource = RT_VkAccelPrepareTlas(combuf); + + prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y); { // FIXME this should be done automatically inside meatpipe, TODO //const uint32_t size = sizeof(struct Lights); @@ -411,7 +390,7 @@ static void reloadMainpipe(void) { for (int i = 0; i < newpipe->resources_count; ++i) { const vk_meatpipe_resource_t *mr = newpipe->resources + i; - gEngine.Con_Reportf("res %d/%d: %s descriptor=%u count=%d flags=[%c%c] image_format=%u\n", + DEBUG("res %d/%d: %s descriptor=%u count=%d flags=[%c%c] image_format=%u", i, newpipe->resources_count, mr->name, mr->descriptor_type, mr->count, (mr->flags & MEATPIPE_RES_WRITE) ? 'W' : ' ', (mr->flags & MEATPIPE_RES_CREATE) ? 'C' : ' ', @@ -420,7 +399,7 @@ static void reloadMainpipe(void) { const qboolean create = !!(mr->flags & MEATPIPE_RES_CREATE); if (create && mr->descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - gEngine.Con_Printf(S_ERROR "Only storage image creation is supported for meatpipes\n"); + ERR("Only storage image creation is supported for meatpipes"); goto fail; } @@ -429,7 +408,7 @@ static void reloadMainpipe(void) { const int index = create ? getResourceSlotForName(mr->name) : findResource(mr->name); if (index < 0) { - gEngine.Con_Printf(S_ERROR "Couldn't find resource/slot for %s\n", mr->name); + ERR("Couldn't find resource/slot for %s", mr->name); goto fail; } @@ -476,7 +455,7 @@ static void reloadMainpipe(void) { } if (!newpipe_out) { - gEngine.Con_Printf(S_ERROR "New rt.json doesn't define an 'dest' output texture\n"); + ERR("New rt.json doesn't define an 'dest' output texture"); goto fail; } @@ -495,7 +474,7 @@ static void reloadMainpipe(void) { const int dest_index = findResource(pr->name); if (dest_index < 0) { - gEngine.Con_Printf(S_ERROR "Couldn't find prev_ resource/slot %s for resource %s\n", pr->name, mr->name); + ERR("Couldn't find prev_ resource/slot %s for resource %s", pr->name, mr->name); goto fail; } @@ -553,7 +532,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args) // XVK_RayModel_Validate(); if (g_rtx.reload_pipeline) { - gEngine.Con_Printf(S_WARN "Reloading RTX shaders/pipelines\n"); + WARN("Reloading RTX shaders/pipelines"); XVK_CHECK(vkDeviceWaitIdle(vk_core.device)); reloadMainpipe(); @@ -563,7 +542,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args) ASSERT(g_rtx.mainpipe_out); - if (g_ray_model_state.frame.num_models == 0) { + if (g_ray_model_state.frame.instances_count == 0) { const r_vkimage_blit_args blit_args = { .in_stage = VK_PIPELINE_STAGE_TRANSFER_BIT, .src = { @@ -607,6 +586,10 @@ qboolean VK_RayInit( void ) if (!RT_VkAccelInit()) return false; + // FIXME shutdown accel + if (!RT_DynamicModelInit()) + return false; + #define REGISTER_EXTERNAL(type, name_) \ Q_strncpy(g_rtx.res[ExternalResource_##name_].name, #name_, sizeof(g_rtx.res[0].name)); \ g_rtx.res[ExternalResource_##name_].refcount = 1; @@ -641,7 +624,7 @@ qboolean VK_RayInit( void ) return false; } - if (!VK_BufferCreate("model headers", &g_ray_model_state.model_headers_buffer, sizeof(struct ModelHeader) * MAX_ACCELS, + if (!VK_BufferCreate("model headers", &g_ray_model_state.model_headers_buffer, sizeof(struct ModelHeader) * MAX_INSTANCES, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { // FIXME complain, handle @@ -665,4 +648,5 @@ void VK_RayShutdown( void ) { VK_BufferDestroy(&g_rtx.uniform_buffer); RT_VkAccelShutdown(); + RT_DynamicModelShutdown(); } diff --git a/ref/vk/vk_rtx.h b/ref/vk/vk_rtx.h index f68d0cd9..35798ec4 100644 --- a/ref/vk/vk_rtx.h +++ b/ref/vk/vk_rtx.h @@ -1,30 +1,10 @@ #pragma once +#include "vk_geometry.h" #include "vk_core.h" -struct vk_render_model_s; -struct vk_ray_model_s; -struct model_s; - -typedef struct { - struct vk_render_model_s *model; - VkBuffer buffer; // TODO must be uniform for all models. Shall we read it directly from vk_render? -} vk_ray_model_init_t; - -struct vk_ray_model_s *VK_RayModelCreate( vk_ray_model_init_t model_init ); -void VK_RayModelDestroy( struct vk_ray_model_s *model ); - void VK_RayFrameBegin( void ); -// TODO how to improve this render vs ray model storage/interaction? -void VK_RayFrameAddModel(struct vk_ray_model_s *model, const struct vk_render_model_s *render_model); - -typedef struct { - VkBuffer buffer; - uint32_t offset; - uint64_t size; -} vk_buffer_region_t; - typedef struct { struct vk_combuf_s *combuf; @@ -37,6 +17,7 @@ typedef struct { const matrix4x4 *projection, *view; // Buffer holding vertex and index data + // TODO remove struct { VkBuffer buffer; // must be the same as in vk_ray_model_create_t TODO: validate or make impossible to specify incorrectly uint64_t size; @@ -47,8 +28,58 @@ typedef struct { void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args); void VK_RayNewMap( void ); -void VK_RayMapLoadEnd( void ); qboolean VK_RayInit( void ); void VK_RayShutdown( void ); +struct vk_render_geometry_s; +struct rt_model_s; + +typedef enum { + kBlasBuildStatic, // builds slow for fast trace + kBlasBuildDynamicUpdate, // builds if not built, updates if built + kBlasBuildDynamicFast, // builds fast from scratch (no correlation with previous frame guaranteed, e.g. triapi) +} rt_blas_usage_e; + +typedef struct { + const char *debug_name; // Must remain alive until RT_ModelDestroy + const struct vk_render_geometry_s *geometries; + int geometries_count; + rt_blas_usage_e usage; +} rt_model_create_t; +struct rt_model_s *RT_ModelCreate(rt_model_create_t args); +void RT_ModelDestroy(struct rt_model_s *model); + +qboolean RT_ModelUpdate(struct rt_model_s *model, const struct vk_render_geometry_s *geometries, int geometries_count); + +qboolean RT_ModelUpdateMaterials(struct rt_model_s *model, const struct vk_render_geometry_s *geometries, int geometries_count, const int *geom_indices, int geom_indices_count); + +typedef struct { + int render_type; // TODO material_mode + const matrix3x4 *transform, *prev_transform; + const vec4_t *color; + + struct rt_light_add_polygon_s *dynamic_polylights; + int dynamic_polylights_count; + + struct { + int textures; // Override kusochki/material textures if > 0 + + // These are needed in order to recreate kusochki geometry data + // TODO remove when material data is split from kusochki + int geoms_count; + const struct vk_render_geometry_s *geoms; + } override; +} rt_frame_add_model_t; + +void RT_FrameAddModel( struct rt_model_s *model, rt_frame_add_model_t args ); + +typedef struct { + const char *debug_name; + const struct vk_render_geometry_s *geometries; + const vec4_t *color; + int geometries_count; + int render_type; +} rt_frame_add_once_t; + +void RT_FrameAddOnce( rt_frame_add_once_t args ); diff --git a/ref/vk/vk_scene.c b/ref/vk/vk_scene.c index 2fd742f9..29ea2203 100644 --- a/ref/vk/vk_scene.c +++ b/ref/vk/vk_scene.c @@ -19,6 +19,8 @@ #include "camera.h" #include "vk_mapents.h" #include "profiler.h" +#include "vk_entity_data.h" +#include "vk_logs.h" #include "com_strings.h" #include "ref_params.h" @@ -28,6 +30,8 @@ #include // qsort #include +#define LOG_MODULE LogModule_Misc + #define PROFILER_SCOPES(X) \ X(scene_render, "VK_SceneRender"); \ X(draw_viewmodel, "draw viewmodel"); \ @@ -85,9 +89,8 @@ static void loadLights( const model_t *const map ) { // Clears all old map data static void mapLoadBegin( const model_t *const map ) { - // TODO should we do something like VK_BrushBeginLoad? - VK_BrushStatsClear(); - + VK_EntityDataClear(); + R_StudioCacheClear(); R_GeometryBuffer_MapClear(); VK_ClearLightmap(); @@ -106,47 +109,42 @@ static void mapLoadEnd(const model_t *const map) { VK_UploadLightmap(); } -static void loadBrushModels( void ) { +static void preloadModels( void ) { const int num_models = gEngine.EngineGetParm( PARM_NUMMODELS, 0 ); // Load all models at once - gEngine.Con_Reportf( "Num models: %d:\n", num_models ); + DEBUG( "Num models: %d:", num_models ); for( int i = 0; i < num_models; i++ ) { model_t *m; if(( m = gEngine.pfnGetModelByIndex( i + 1 )) == NULL ) continue; - gEngine.Con_Reportf( " %d: name=%s, type=%d, submodels=%d, nodes=%d, surfaces=%d, nummodelsurfaces=%d\n", i, m->name, m->type, m->numsubmodels, m->numnodes, m->numsurfaces, m->nummodelsurfaces); + DEBUG( " %d: name=%s, type=%d, submodels=%d, nodes=%d, surfaces=%d, nummodelsurfaces=%d", i, m->name, m->type, m->numsubmodels, m->numnodes, m->numsurfaces, m->nummodelsurfaces); - if( m->type != mod_brush ) - continue; + switch (m->type) { + case mod_brush: + if (!VK_BrushModelLoad(m)) + gEngine.Host_Error( "Couldn't load brush model %s\n", m->name ); + break; - if (!VK_BrushModelLoad(m)) - gEngine.Host_Error( "Couldn't load model %s\n", m->name ); - } -} + case mod_studio: + if (!R_StudioModelPreload(m)) + gEngine.Host_Error( "Couldn't preload studio model %s\n", m->name ); + break; -// Only used when reloading patches. In norma circumstances models get destroyed by the engine -static void destroyBrushModels( void ) { - const int num_models = gEngine.EngineGetParm( PARM_NUMMODELS, 0 ); - gEngine.Con_Printf("Destroying %d models\n", num_models); - - for( int i = 0; i < num_models; i++ ) { - model_t *m; - if(( m = gEngine.pfnGetModelByIndex( i + 1 )) == NULL ) - continue; - - if( m->type != mod_brush ) - continue; - - VK_BrushModelDestroy(m); + default: + break; + } } } static void loadMap(const model_t* const map) { + VK_LogsReadCvar(); mapLoadBegin(map); + R_SpriteNewMapFIXME(); + // Load light entities and patch data prior to loading map brush model XVK_ParseMapEntities(); @@ -157,20 +155,20 @@ static void loadMap(const model_t* const map) { // Depends on loaded materials. Must preceed loading brush models. XVK_ParseMapPatches(); - loadBrushModels(); + preloadModels(); loadLights(map); mapLoadEnd(map); } static void reloadPatches( void ) { - gEngine.Con_Printf("Reloading patches and materials\n"); + INFO("Reloading patches and materials"); R_VkStagingFlushSync(); XVK_CHECK(vkDeviceWaitIdle( vk_core.device )); - destroyBrushModels(); + VK_BrushModelDestroyAll(); const model_t *const map = gEngine.pfnGetModelByIndex( 1 ); loadMap(map); @@ -227,6 +225,10 @@ int R_FIXME_GetEntityRenderMode( cl_entity_t *ent ) return ent->curstate.rendermode; } +void R_SceneMapDestroy( void ) { + VK_BrushModelDestroyAll(); +} + // tell the renderer what new map is started void R_NewMap( void ) { const model_t *const map = gEngine.pfnGetModelByIndex( 1 ); @@ -235,7 +237,7 @@ void R_NewMap( void ) { // and this R_NewMap call is from within loading of a saved game. const qboolean is_save_load = !!gEngine.pfnGetModelByIndex( 1 )->cache.data; - gEngine.Con_Reportf( "R_NewMap, loading save: %d\n", is_save_load ); + INFO( "R_NewMap, loading save: %d", is_save_load ); // Skip clearing already loaded data if the map hasn't changed. if (is_save_load) @@ -247,6 +249,8 @@ void R_NewMap( void ) { XVK_SetupSky( gEngine.pfnGetMoveVars()->skyName ); loadMap(map); + + R_StudioResetPlayerModels(); } qboolean R_AddEntity( struct cl_entity_s *clent, int type ) @@ -586,7 +590,7 @@ static void drawEntity( cl_entity_t *ent, int render_mode ) for (int i = 0; i < g_map_entities.func_walls_count; ++i) { xvk_mapent_func_wall_t *const fw = g_map_entities.func_walls + i; if (Q_strcmp(ent->model->name, fw->model) == 0) { - /* gEngine.Con_Reportf("ent->index=%d (%s) mapent:%d off=%f %f %f\n", */ + /* DEBUG("ent->index=%d (%s) mapent:%d off=%f %f %f", */ /* ent->index, ent->model->name, fw->entity_index, */ /* fw->origin[0], fw->origin[1], fw->origin[2]); */ Matrix3x4_LoadIdentity(model); @@ -717,7 +721,7 @@ void CL_AddCustomBeam( cl_entity_t *pEnvBeam ) { if( g_lists.draw_list->num_beam_entities >= ARRAYSIZE(g_lists.draw_list->beam_entities) ) { - gEngine.Con_Printf( S_ERROR "Too many beams %d!\n", g_lists.draw_list->num_beam_entities ); + ERR("Too many beams %d!", g_lists.draw_list->num_beam_entities ); return; } diff --git a/ref/vk/vk_scene.h b/ref/vk/vk_scene.h index faf0dbe5..b7dda842 100644 --- a/ref/vk/vk_scene.h +++ b/ref/vk/vk_scene.h @@ -20,6 +20,7 @@ void R_ClearScene( void ); void R_PushScene( void ); void R_PopScene( void ); +void R_SceneMapDestroy( void ); void R_NewMap( void ); void R_RenderScene( void ); diff --git a/ref/vk/vk_sprite.c b/ref/vk/vk_sprite.c index c163e915..d7aff75d 100644 --- a/ref/vk/vk_sprite.c +++ b/ref/vk/vk_sprite.c @@ -5,6 +5,7 @@ #include "vk_geometry.h" #include "vk_scene.h" #include "r_speeds.h" +#include "vk_math.h" #include "sprite.h" #include "xash3d_mathlib.h" @@ -14,6 +15,8 @@ #include +#define MODULE_NAME "sprite" + // it's a Valve default value for LoadMapSprite (probably must be power of two) #define MAPSPRITE_SIZE 128 #define GLARE_FALLOFF 19000.0f @@ -22,11 +25,122 @@ static struct { struct { int sprites; } stats; + + struct { + r_geometry_range_t geom; + vk_render_geometry_t geometry; + vk_render_model_t model; + } quad; } g_sprite; +static qboolean createQuadModel(void) { + g_sprite.quad.geom = R_GeometryRangeAlloc(4, 6); + if (g_sprite.quad.geom.block_handle.size == 0) { + gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for sprite quad\n"); + return false; + } + + const r_geometry_range_lock_t lock = R_GeometryRangeLock(&g_sprite.quad.geom); + + vec3_t point; + vk_vertex_t *dst_vtx; + uint16_t *dst_idx; + + dst_vtx = lock.vertices; + dst_idx = lock.indices; + + const vec3_t org = {0, 0, 0}; + const vec3_t v_right = {1, 0, 0}; + const vec3_t v_up = {0, 1, 0}; + vec3_t v_normal; + CrossProduct(v_right, v_up, v_normal); + + VectorMA( org, -1.f, v_up, point ); + VectorMA( point, -1.f, v_right, dst_vtx[0].pos ); + dst_vtx[0].gl_tc[0] = 0.f; + dst_vtx[0].gl_tc[1] = 1.f; + dst_vtx[0].lm_tc[0] = dst_vtx[0].lm_tc[1] = 0.f; + Vector4Set(dst_vtx[0].color, 255, 255, 255, 255); + VectorCopy(v_normal, dst_vtx[0].normal); + + VectorMA( org, 1.f, v_up, point ); + VectorMA( point, -1.f, v_right, dst_vtx[1].pos ); + dst_vtx[1].gl_tc[0] = 0.f; + dst_vtx[1].gl_tc[1] = 0.f; + dst_vtx[1].lm_tc[0] = dst_vtx[1].lm_tc[1] = 0.f; + Vector4Set(dst_vtx[1].color, 255, 255, 255, 255); + VectorCopy(v_normal, dst_vtx[1].normal); + + VectorMA( org, 1.f, v_up, point ); + VectorMA( point, 1.f, v_right, dst_vtx[2].pos ); + dst_vtx[2].gl_tc[0] = 1.f; + dst_vtx[2].gl_tc[1] = 0.f; + dst_vtx[2].lm_tc[0] = dst_vtx[2].lm_tc[1] = 0.f; + Vector4Set(dst_vtx[2].color, 255, 255, 255, 255); + VectorCopy(v_normal, dst_vtx[2].normal); + + VectorMA( org, -1.f, v_up, point ); + VectorMA( point, 1.f, v_right, dst_vtx[3].pos ); + dst_vtx[3].gl_tc[0] = 1.f; + dst_vtx[3].gl_tc[1] = 1.f; + dst_vtx[3].lm_tc[0] = dst_vtx[3].lm_tc[1] = 0.f; + Vector4Set(dst_vtx[3].color, 255, 255, 255, 255); + VectorCopy(v_normal, dst_vtx[3].normal); + + dst_idx[0] = 0; + dst_idx[1] = 1; + dst_idx[2] = 2; + dst_idx[3] = 0; + dst_idx[4] = 2; + dst_idx[5] = 3; + + R_GeometryRangeUnlock( &lock ); + + g_sprite.quad.geometry = (vk_render_geometry_t){ + .max_vertex = 4, + .vertex_offset = g_sprite.quad.geom.vertices.unit_offset, + + .element_count = 6, + .index_offset = g_sprite.quad.geom.indices.unit_offset, + + .material = kXVkMaterialRegular, + .texture = tglob.defaultTexture, + .emissive = {1,1,1}, + }; + + return R_RenderModelCreate(&g_sprite.quad.model, (vk_render_model_init_t){ + .name = "sprite", + .geometries = &g_sprite.quad.geometry, + .geometries_count = 1, + .dynamic = false, + }); +} + +static void destroyQuadModel(void) { + if (g_sprite.quad.model.num_geometries) + R_RenderModelDestroy(&g_sprite.quad.model); + + if (g_sprite.quad.geom.block_handle.size) + R_GeometryRangeFree(&g_sprite.quad.geom); + + g_sprite.quad.model.num_geometries = 0; + g_sprite.quad.geom.block_handle.size = 0; +} + qboolean R_SpriteInit(void) { - R_SpeedsRegisterMetric(&g_sprite.stats.sprites, "sprites_count", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_sprite.stats.sprites, "count", kSpeedsMetricCount); + return true; + // TODO return createQuadModel(); +} + +void R_SpriteShutdown(void) { + destroyQuadModel(); +} + +void R_SpriteNewMapFIXME(void) { + destroyQuadModel(); + ASSERT(createQuadModel()); } static mspriteframe_t *R_GetSpriteFrame( const model_t *pModel, int frame, float yaw ) @@ -672,81 +786,25 @@ static vk_render_type_e spriteRenderModeToRenderType( int render_mode ) { } static void R_DrawSpriteQuad( const char *debug_name, mspriteframe_t *frame, vec3_t org, vec3_t v_right, vec3_t v_up, float scale, int texture, int render_mode, const vec4_t color ) { - r_geometry_buffer_lock_t buffer; - if (!R_GeometryBufferAllocAndLock( &buffer, 4, 6, LifetimeSingleFrame )) { - gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for sprite quad\n"); - return; - } - - vec3_t point; - vk_vertex_t *dst_vtx; - uint16_t *dst_idx; - - dst_vtx = buffer.vertices.ptr; - dst_idx = buffer.indices.ptr; - vec3_t v_normal; CrossProduct(v_right, v_up, v_normal); - VectorMA( org, frame->down * scale, v_up, point ); - VectorMA( point, frame->left * scale, v_right, dst_vtx[0].pos ); - dst_vtx[0].gl_tc[0] = 0.f; - dst_vtx[0].gl_tc[1] = 1.f; - dst_vtx[0].lm_tc[0] = dst_vtx[0].lm_tc[1] = 0.f; - Vector4Set(dst_vtx[0].color, 255, 255, 255, 255); - VectorCopy(v_normal, dst_vtx[0].normal); + // TODO can frame->right/left and frame->up/down be asymmetric? + VectorScale(v_right, frame->right * scale, v_right); + VectorScale(v_up, frame->up * scale, v_up); - VectorMA( org, frame->up * scale, v_up, point ); - VectorMA( point, frame->left * scale, v_right, dst_vtx[1].pos ); - dst_vtx[1].gl_tc[0] = 0.f; - dst_vtx[1].gl_tc[1] = 0.f; - dst_vtx[1].lm_tc[0] = dst_vtx[1].lm_tc[1] = 0.f; - Vector4Set(dst_vtx[1].color, 255, 255, 255, 255); - VectorCopy(v_normal, dst_vtx[1].normal); + matrix4x4 transform; + Matrix4x4_CreateFromVectors(transform, v_right, v_up, v_normal, org); - VectorMA( org, frame->up * scale, v_up, point ); - VectorMA( point, frame->right * scale, v_right, dst_vtx[2].pos ); - dst_vtx[2].gl_tc[0] = 1.f; - dst_vtx[2].gl_tc[1] = 0.f; - dst_vtx[2].lm_tc[0] = dst_vtx[2].lm_tc[1] = 0.f; - Vector4Set(dst_vtx[2].color, 255, 255, 255, 255); - VectorCopy(v_normal, dst_vtx[2].normal); + const vk_render_type_e render_type = spriteRenderModeToRenderType(render_mode); - VectorMA( org, frame->down * scale, v_up, point ); - VectorMA( point, frame->right * scale, v_right, dst_vtx[3].pos ); - dst_vtx[3].gl_tc[0] = 1.f; - dst_vtx[3].gl_tc[1] = 1.f; - dst_vtx[3].lm_tc[0] = dst_vtx[3].lm_tc[1] = 0.f; - Vector4Set(dst_vtx[3].color, 255, 255, 255, 255); - VectorCopy(v_normal, dst_vtx[3].normal); - - dst_idx[0] = 0; - dst_idx[1] = 1; - dst_idx[2] = 2; - dst_idx[3] = 0; - dst_idx[4] = 2; - dst_idx[5] = 3; - - R_GeometryBufferUnlock( &buffer ); - - { - const vk_render_geometry_t geometry = { - .texture = texture, - .material = kXVkMaterialRegular, - - .max_vertex = 4, - .vertex_offset = buffer.vertices.unit_offset, - - .element_count = 6, - .index_offset = buffer.indices.unit_offset, - - .emissive = {1,1,1}, - }; - - VK_RenderModelDynamicBegin( spriteRenderModeToRenderType(render_mode), color, m_matrix4x4_identity, "%s", debug_name ); - VK_RenderModelDynamicAddGeometry( &geometry ); - VK_RenderModelDynamicCommit(); - } + R_RenderModelDraw(&g_sprite.quad.model, (r_model_draw_t){ + .render_type = render_type, + .color = (const vec4_t*)color, + .transform = &transform, + .prev_transform = &transform, + .textures_override = texture, + }); } static qboolean R_SpriteHasLightmap( cl_entity_t *e, int texFormat ) diff --git a/ref/vk/vk_sprite.h b/ref/vk/vk_sprite.h index 5b6f89f7..10664536 100644 --- a/ref/vk/vk_sprite.h +++ b/ref/vk/vk_sprite.h @@ -10,3 +10,7 @@ void Mod_LoadSpriteModel( model_t *mod, const void *buffer, qboolean *loaded, ui void R_VkSpriteDrawModel( cl_entity_t *e, float blend ); qboolean R_SpriteInit(void); +void R_SpriteShutdown(void); + +// FIXME needed to recreate the sprite quad model, otherwise its memory will be freed, reused and corrupted +void R_SpriteNewMapFIXME(void); diff --git a/ref/vk/vk_staging.c b/ref/vk/vk_staging.c index f675c290..b87f0f51 100644 --- a/ref/vk/vk_staging.c +++ b/ref/vk/vk_staging.c @@ -8,6 +8,8 @@ #include +#define MODULE_NAME "staging" + #define DEFAULT_STAGING_SIZE (128*1024*1024) #define MAX_STAGING_ALLOCS (2048) #define MAX_CONCURRENT_FRAMES 2 @@ -62,12 +64,12 @@ qboolean R_VkStagingInit(void) { R_FlippingBuffer_Init(&g_staging.buffer_alloc, DEFAULT_STAGING_SIZE); - R_SpeedsRegisterMetric(&g_staging.stats.total_size, "staging_total_size", kSpeedsMetricBytes); - R_SpeedsRegisterMetric(&g_staging.stats.buffers_size, "staging_buffers_size", kSpeedsMetricBytes); - R_SpeedsRegisterMetric(&g_staging.stats.images_size, "staging_images_size", kSpeedsMetricBytes); + R_SPEEDS_COUNTER(g_staging.stats.total_size, "total_size", kSpeedsMetricBytes); + R_SPEEDS_COUNTER(g_staging.stats.buffers_size, "buffers_size", kSpeedsMetricBytes); + R_SPEEDS_COUNTER(g_staging.stats.images_size, "images_size", kSpeedsMetricBytes); - R_SpeedsRegisterMetric(&g_staging.stats.buffer_chunks, "staging_buffer_chunks", kSpeedsMetricCount); - R_SpeedsRegisterMetric(&g_staging.stats.images, "staging_images", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_staging.stats.buffer_chunks, "buffer_chunks", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_staging.stats.images, "images", kSpeedsMetricCount); g_staging.buffer_upload_scope_id = R_VkGpuScope_Register("staging_buffers"); g_staging.image_upload_scope_id = R_VkGpuScope_Register("staging_images"); diff --git a/ref/vk/vk_studio.c b/ref/vk/vk_studio.c index aa2c3b37..2196c61a 100644 --- a/ref/vk/vk_studio.c +++ b/ref/vk/vk_studio.c @@ -4,12 +4,14 @@ #include "vk_textures.h" #include "vk_render.h" #include "vk_geometry.h" -#include "vk_previous_frame.h" #include "vk_renderstate.h" #include "vk_math.h" #include "vk_cvar.h" #include "camera.h" #include "r_speeds.h" +#include "vk_studio_model.h" +#include "vk_entity_data.h" +#include "vk_logs.h" #include "xash3d_mathlib.h" #include "const.h" @@ -25,6 +27,9 @@ #include #include +#define MODULE_NAME "studio" +#define LOG_MODULE LogModule_Studio + #define EVENT_CLIENT 5000 // less than this value it's a server-side studio events #define MAX_LOCALLIGHTS 4 @@ -46,7 +51,7 @@ cvar_t r_shadows = { (char*)"r_shadows", (char*)"0", 0 }; typedef struct sortedmesh_s { - mstudiomesh_t *mesh; + const mstudiomesh_t *mesh; int flags; // face flags } sortedmesh_t; @@ -78,8 +83,6 @@ typedef struct vec3_t norms[MAXSTUDIOVERTS]; vec3_t tangents[MAXSTUDIOVERTS]; - vec3_t prev_verts[MAXSTUDIOVERTS]; // last frame state for motion vectors - // lighting state float ambientlight; float shadelight; @@ -110,10 +113,6 @@ typedef struct // playermodels player_model_t player_models[MAX_CLIENTS]; - - // drawelements renderer - uint numverts; - uint numelems; } studio_draw_state_t; // studio-related cvars @@ -124,6 +123,9 @@ static studio_draw_state_t g_studio; // global studio state static struct { int models_count; + int submodels_total; + int submodels_static; + int submodels_dynamic; } g_studio_stats; // global variables @@ -135,7 +137,7 @@ studiohdr_t *m_pStudioHeader; float m_flGaitMovement; int g_iBackFaceCull; int g_nTopColor, g_nBottomColor; // remap colors -int g_nFaceFlags, g_nForceFaceFlags; +int g_nForceFaceFlags; // FIXME VK this should be promoted to somewhere global-ish, and done properly // For now it's just a hack to get studio models to compile basically @@ -145,19 +147,10 @@ static struct { model_t *currentmodel; } RI; -void R_StudioInit( void ) -{ - Matrix3x4_LoadIdentity( g_studio.rotationmatrix ); - - // g-cont. cvar disabled by Valve -// gEngine.Cvar_RegisterVariable( &r_shadows ); - - g_studio.interpolate = true; - g_studio.framecount = 0; - m_fDoRemap = false; - - R_SpeedsRegisterMetric(&g_studio_stats.models_count, "models_studio", kSpeedsMetricCount); -} +static struct { + r_studio_entity_model_t *entmodel; + int bodypart_index; +} g_studio_current; /* ================ @@ -260,7 +253,7 @@ static qboolean R_StudioComputeBBox( vec3_t bbox[8] ) return true; // visible } -void R_StudioComputeSkinMatrix( mstudioboneweight_t *boneweights, matrix3x4 *worldtransform, matrix3x4 result ) +void R_StudioComputeSkinMatrix( const mstudioboneweight_t *boneweights, matrix3x4 *worldtransform, matrix3x4 result ) { float flWeight0, flWeight1, flWeight2, flWeight3; int i, numbones = 0; @@ -353,150 +346,12 @@ void R_StudioComputeSkinMatrix( mstudioboneweight_t *boneweights, matrix3x4 *wor } } -/* -=============== -pfnGetCurrentEntity - -=============== -*/ -static cl_entity_t *pfnGetCurrentEntity( void ) -{ - return RI.currententity; -} - -/* -=============== -pfnPlayerInfo - -=============== -*/ -player_info_t *pfnPlayerInfo( int index ) -{ - if( !RI.drawWorld ) - index = -1; - - return gEngine.pfnPlayerInfo( index ); -} - -/* -=============== -pfnMod_ForName - -=============== -*/ -static model_t *pfnMod_ForName( const char *model, int crash ) -{ - return gEngine.Mod_ForName( model, crash, false ); -} - -/* -=============== -pfnGetPlayerState - -=============== -*/ -entity_state_t *R_StudioGetPlayerState( int index ) -{ - if( !RI.drawWorld ) - return &RI.currententity->curstate; - - return gEngine.pfnGetPlayerState( index ); -} - -/* -=============== -pfnGetViewEntity - -=============== -*/ -static cl_entity_t *pfnGetViewEntity( void ) -{ - return gEngine.GetViewModel(); -} - -static void pfnGetEngineTimes( int *framecount, double *current, double *old ) -{ - /* FIXME VK NOT IMPLEMENTED */ - /* if( framecount ) *framecount = tr.realframecount; */ - if( framecount ) *framecount = 0; - if( current ) *current = gpGlobals->time; - if( old ) *old = gpGlobals->oldtime; -} - -static void pfnGetViewInfo( float *origin, float *upv, float *rightv, float *forwardv ) -{ - if( origin ) VectorCopy( g_camera.vieworg, origin ); - if( forwardv ) VectorCopy( g_camera.vforward, forwardv ); - if( rightv ) VectorCopy( g_camera.vright, rightv ); - if( upv ) VectorCopy( g_camera.vup, upv ); -} static model_t *R_GetChromeSprite( void ) { return gEngine.GetDefaultSprite( REF_CHROME_SPRITE ); } -static int fixme_studio_models_drawn; - -static void pfnGetModelCounters( int **s, int **a ) -{ - *s = &g_studio.framecount; - - /* FIXME VK NOT IMPLEMENTED */ - /* *a = &r_stats.c_studio_models_drawn; */ - *a = &fixme_studio_models_drawn; -} - -static void pfnGetAliasScale( float *x, float *y ) -{ - if( x ) *x = 1.0f; - if( y ) *y = 1.0f; -} - -static float ****pfnStudioGetBoneTransform( void ) -{ - return (float ****)g_studio.bonestransform; -} - -/* -=============== -pfnStudioGetLightTransform - -=============== -*/ -static float ****pfnStudioGetLightTransform( void ) -{ - return (float ****)g_studio.lighttransform; -} - -/* -=============== -pfnStudioGetAliasTransform - -=============== -*/ -static float ***pfnStudioGetAliasTransform( void ) -{ - return NULL; -} - -/* -=============== -pfnStudioGetRotationMatrix - -=============== -*/ -static float ***pfnStudioGetRotationMatrix( void ) -{ - return (float ***)g_studio.rotationmatrix; -} - -/* -==================== -StudioPlayerBlend - -==================== -*/ void R_StudioPlayerBlend( mstudioseqdesc_t *pseqdesc, int *pBlend, float *pPitch ) { // calc up/down pointing @@ -521,12 +376,6 @@ void R_StudioPlayerBlend( mstudioseqdesc_t *pseqdesc, int *pBlend, float *pPitch } } -/* -==================== -R_StudioLerpMovement - -==================== -*/ void R_StudioLerpMovement( cl_entity_t *e, double time, vec3_t origin, vec3_t angles ) { float f = 1.0f; @@ -552,12 +401,6 @@ void R_StudioLerpMovement( cl_entity_t *e, double time, vec3_t origin, vec3_t an else VectorCopy( e->curstate.angles, angles ); } -/* -==================== -StudioSetUpTransform - -==================== -*/ void R_StudioSetUpTransform( cl_entity_t *e ) { vec3_t origin, angles; @@ -588,12 +431,6 @@ void R_StudioSetUpTransform( cl_entity_t *e ) /* } */ } -/* -==================== -StudioEstimateFrame - -==================== -*/ float R_StudioEstimateFrame( cl_entity_t *e, mstudioseqdesc_t *pseqdesc, double time ) { double dfdt, f; @@ -625,12 +462,6 @@ float R_StudioEstimateFrame( cl_entity_t *e, mstudioseqdesc_t *pseqdesc, double return f; } -/* -==================== -StudioEstimateInterpolant - -==================== -*/ float R_StudioEstimateInterpolant( cl_entity_t *e ) { float dadt = 1.0f; @@ -644,41 +475,6 @@ float R_StudioEstimateInterpolant( cl_entity_t *e ) return dadt; } -/* -==================== -CL_GetSequenceDuration - -==================== -*/ -float CL_GetSequenceDuration( cl_entity_t *ent, int sequence ) -{ - studiohdr_t *pstudiohdr; - mstudioseqdesc_t *pseqdesc; - - if( ent->model != NULL && ent->model->type == mod_studio ) - { - pstudiohdr = (studiohdr_t *)gEngine.Mod_Extradata( mod_studio, ent->model ); - - if( pstudiohdr ) - { - sequence = bound( 0, sequence, pstudiohdr->numseq - 1 ); - pseqdesc = (mstudioseqdesc_t *)((byte *)pstudiohdr + pstudiohdr->seqindex) + sequence; - - if( pseqdesc->numframes > 1 && pseqdesc->fps > 0 ) - return (float)pseqdesc->numframes / (float)pseqdesc->fps; - } - } - - return 0.1f; -} - - -/* -==================== -StudioFxTransform - -==================== -*/ void R_StudioFxTransform( cl_entity_t *ent, matrix3x4 transform ) { switch( ent->curstate.renderfx ) @@ -717,12 +513,6 @@ void R_StudioFxTransform( cl_entity_t *ent, matrix3x4 transform ) } } -/* -==================== -StudioCalcBoneAdj - -==================== -*/ void R_StudioCalcBoneAdj( float dadt, float *adj, const byte *pcontroller1, const byte *pcontroller2, byte mouthopen ) { mstudiobonecontroller_t *pbonecontroller; @@ -782,12 +572,6 @@ void R_StudioCalcBoneAdj( float dadt, float *adj, const byte *pcontroller1, cons } } -/* -==================== -StudioCalcRotations - -==================== -*/ void R_StudioCalcRotations( cl_entity_t *e, float pos[][3], vec4_t *q, mstudioseqdesc_t *pseqdesc, mstudioanim_t *panim, float f ) { int i, frame; @@ -829,12 +613,6 @@ void R_StudioCalcRotations( cl_entity_t *e, float pos[][3], vec4_t *q, mstudiose if( pseqdesc->motiontype & STUDIO_Z ) pos[pseqdesc->motionbone][2] = 0.0f; } -/* -==================== -StudioMergeBones - -==================== -*/ void R_StudioMergeBones( cl_entity_t *e, model_t *m_pSubModel ) { int i, j; @@ -889,12 +667,6 @@ void R_StudioMergeBones( cl_entity_t *e, model_t *m_pSubModel ) } } -/* -==================== -StudioSetupBones - -==================== -*/ void R_StudioSetupBones( cl_entity_t *e ) { float f; @@ -1046,12 +818,6 @@ void R_StudioSetupBones( cl_entity_t *e ) } } -/* -==================== -StudioSaveBones - -==================== -*/ static void R_StudioSaveBones( void ) { mstudiobone_t *pbones; @@ -1248,12 +1014,6 @@ void R_StudioGenerateNormals( void ) } } -/* -==================== -StudioSetupChrome - -==================== -*/ void R_StudioSetupChrome( float *pchrome, int bone, vec3_t normal ) { float n; @@ -1291,12 +1051,6 @@ void R_StudioSetupChrome( float *pchrome, int bone, vec3_t normal ) pchrome[1] = (n + 1.0f) * 32.0f; } -/* -==================== -StudioCalcAttachments - -==================== -*/ static void R_StudioCalcAttachments( void ) { mstudioattachment_t *pAtt; @@ -1311,12 +1065,6 @@ static void R_StudioCalcAttachments( void ) } } -/* -=============== -pfnStudioSetupModel - -=============== -*/ static void R_StudioSetupModel( int bodypart, void **ppbodypart, void **ppsubmodel ) { int index; @@ -1324,6 +1072,8 @@ static void R_StudioSetupModel( int bodypart, void **ppbodypart, void **ppsubmod if( bodypart > m_pStudioHeader->numbodyparts ) bodypart = 0; + g_studio_current.bodypart_index = bodypart; + m_pBodyPart = (mstudiobodyparts_t *)((byte *)m_pStudioHeader + m_pStudioHeader->bodypartindex) + bodypart; index = RI.currententity->curstate.body / m_pBodyPart->base; @@ -1335,12 +1085,6 @@ static void R_StudioSetupModel( int bodypart, void **ppbodypart, void **ppsubmod if( ppsubmodel ) *ppsubmodel = m_pSubModel; } -/* -=============== -R_StudioCheckBBox - -=============== -*/ static int R_StudioCheckBBox( void ) { if( !RI.currententity || !RI.currentmodel ) @@ -1349,12 +1093,6 @@ static int R_StudioCheckBBox( void ) return R_StudioComputeBBox( NULL ); } -/* -=============== -R_StudioDynamicLight - -=============== -*/ void R_StudioDynamicLight( cl_entity_t *ent, alight_t *plight ) { movevars_t *mv = gEngine.pfnGetMoveVars(); @@ -1770,7 +1508,7 @@ static void R_StudioSetColorBegin(const short *ptricmds, const vec3_t *pstudiono R_StudioSetColorArray( ptricmds, pstudionorms, out_color ); } -void R_LightStrength( int bone, vec3_t localpos, vec4_t light[MAX_LOCALLIGHTS] ) +void R_LightStrength( int bone, const vec3_t localpos, vec4_t light[MAX_LOCALLIGHTS] ) { int i; @@ -1915,71 +1653,82 @@ static int R_StudioMeshCompare( const void *a, const void *b ) return 0; } -static void R_StudioDrawNormalMesh( short *ptricmds, vec3_t *pstudionorms, float s, float t, int texture ) -{ - float *lv; - int i; - int num_vertices = 0, num_indices = 0; - vk_vertex_t *dst_vtx; - uint16_t *dst_idx; - uint32_t vertex_offset = 0, index_offset = 0; - short* const ptricmds_initial = ptricmds; - r_geometry_buffer_lock_t buffer; +static void addVerticesIndicesCounts( const short *ptricmds, int *num_vertices, int *num_indices ) { + int i; - // Compute counts of vertices and indices - while(( i = *( ptricmds++ ))) - { + while(( i = *( ptricmds++ ))) { enum { FAN, STRIP } mode = i < 0 ? FAN : STRIP; const int vertices = mode == FAN ? -i : i; ASSERT(vertices > 2); - num_vertices += vertices; - num_indices += (vertices-2) * 3; + *num_vertices += vertices; + *num_indices += (vertices-2) * 3; ptricmds += 4 * vertices; } +} +typedef struct { + const short *ptricmds; + const vec3_t *pstudionorms; + const vec3_t *prev_verts; + + float s, t; + int texture; + int face_flags; + + uint32_t vertices_offset; + uint32_t indices_offset; + + vk_vertex_t *dst_vertices; + uint16_t *dst_indices; + vk_render_geometry_t *out_geometry; + + int *out_vertices_count; + int *out_indices_count; +} build_submodel_mesh_t; + +static void buildSubmodelMeshGeometry( build_submodel_mesh_t args ) { + int i; + uint32_t vertex_offset = 0, index_offset = 0; + + int num_vertices = 0, num_indices = 0; + addVerticesIndicesCounts(args.ptricmds, &num_vertices, &num_indices); ASSERT(num_vertices > 0); ASSERT(num_indices > 0); - // Get buffer region for vertices and indices - if (!R_GeometryBufferAllocAndLock( &buffer, num_vertices, num_indices, LifetimeSingleFrame )) { - gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for studio model\n"); - return; - } - - dst_vtx = buffer.vertices.ptr; - dst_idx = buffer.indices.ptr; + vk_vertex_t *dst_vtx = args.dst_vertices; + uint16_t *dst_idx = args.dst_indices; // Restore ptricmds and upload vertices - ptricmds = ptricmds_initial; - while(( i = *( ptricmds++ ))) + while(( i = *( args.ptricmds++ ))) { enum { FAN, STRIP } mode = i < 0 ? FAN : STRIP; const int vertices = mode == FAN ? -i : i; uint32_t elements = 0; - for(int j = 0; j < vertices ; ++j, ++dst_vtx, ptricmds += 4 ) + for(int j = 0; j < vertices ; ++j, ++dst_vtx, args.ptricmds += 4 ) { - ASSERT((((vk_vertex_t*)buffer.vertices.ptr) + num_vertices) > dst_vtx); + const int vi = args.ptricmds[0]; *dst_vtx = (vk_vertex_t){0}; - VectorCopy(g_studio.verts[ptricmds[0]], dst_vtx->pos); - VectorCopy(g_studio.prev_verts[ptricmds[0]], dst_vtx->prev_pos); - VectorCopy(g_studio.norms[ptricmds[0]], dst_vtx->normal); - VectorCopy(g_studio.tangents[ptricmds[0]], dst_vtx->tangent); + VectorCopy(g_studio.verts[vi], dst_vtx->pos); + VectorCopy(args.prev_verts[vi], dst_vtx->prev_pos); + + VectorCopy(g_studio.norms[vi], dst_vtx->normal); + VectorCopy(g_studio.tangents[vi], dst_vtx->tangent); dst_vtx->lm_tc[0] = dst_vtx->lm_tc[1] = 0.f; - if (FBitSet( g_nFaceFlags, STUDIO_NF_CHROME )) + if (FBitSet( args.face_flags, STUDIO_NF_CHROME )) { // FIXME also support glow mode - const int idx = ptricmds[1]; - dst_vtx->gl_tc[0] = g_studio.chrome[idx][0] * s; - dst_vtx->gl_tc[1] = g_studio.chrome[idx][1] * t; + const int idx = args.ptricmds[1]; + dst_vtx->gl_tc[0] = g_studio.chrome[idx][0] * args.s; + dst_vtx->gl_tc[1] = g_studio.chrome[idx][1] * args.t; } else { - dst_vtx->gl_tc[0] = ptricmds[2] * s; - dst_vtx->gl_tc[1] = ptricmds[3] * t; + dst_vtx->gl_tc[0] = args.ptricmds[2] * args.s; + dst_vtx->gl_tc[1] = args.ptricmds[3] * args.t; } - R_StudioSetColorBegin( ptricmds, pstudionorms, dst_vtx->color ); + R_StudioSetColorBegin( args.ptricmds, args.pstudionorms, dst_vtx->color ); if (j > 1) { switch (mode) { @@ -2019,26 +1768,21 @@ static void R_StudioDrawNormalMesh( short *ptricmds, vec3_t *pstudionorms, float ASSERT(index_offset == num_indices); ASSERT(vertex_offset == num_vertices); - R_GeometryBufferUnlock( &buffer ); + *args.out_geometry = (vk_render_geometry_t){ + .texture = args.texture, + .material = FBitSet( args.face_flags, STUDIO_NF_CHROME ) ? kXVkMaterialChrome : kXVkMaterialRegular, - // Render - { - const vk_render_geometry_t geometry = { - //.lightmap = tglob.whiteTexture, - .texture = texture, - .material = FBitSet( g_nFaceFlags, STUDIO_NF_CHROME ) ? kXVkMaterialChrome : kXVkMaterialRegular, + .vertex_offset = args.vertices_offset, + .max_vertex = num_vertices, - .vertex_offset = buffer.vertices.unit_offset, - .max_vertex = num_vertices, + .index_offset = args.indices_offset, + .element_count = num_indices, - .index_offset = buffer.indices.unit_offset, - .element_count = num_indices, + .emissive = {0, 0, 0}, + }; - .emissive = {0, 0, 0}, - }; - - VK_RenderModelDynamicAddGeometry( &geometry ); - } + *args.out_vertices_count += num_vertices; + *args.out_indices_count += num_indices; } /* FIXME VK @@ -2128,41 +1872,32 @@ static vk_render_type_e studioRenderModeToRenderType( int render_mode ) { return kVkRenderTypeSolid; } -static void R_StudioDrawPoints( void ) -{ - int i, j, k, m_skinnum; - float shellscale = 0.0f; - qboolean need_sort = false; - byte *pvertbone; - byte *pnormbone; - vec3_t *pstudioverts; - vec3_t *pstudionorms; - mstudiotexture_t *ptexture; - mstudiomesh_t *pmesh; - short *pskinref; - float lv_tmp; +typedef struct { + //const mstudiomodel_t *submodel; + const r_geometry_range_t *geometry; + vk_render_geometry_t *geometries; + int vertex_count, index_count; + const vec3_t *prev_verts; +} build_submodel_geometry_t; - if( !m_pStudioHeader ) return; +static void buildStudioSubmodelGeometry(build_submodel_geometry_t args) { + // FIXME: do not reference global things like RI.* m_pStudio* here, pass everything by args + const r_geometry_range_lock_t geom_lock = R_GeometryRangeLock(args.geometry); + ASSERT(geom_lock.vertices); + ASSERT(geom_lock.indices); - vec4_t color = {1, 1, 1, g_studio.blend}; - if (g_studio.rendermode2 == kRenderTransAdd) { - Vector4Set(color, g_studio.blend, g_studio.blend, g_studio.blend, 1.f); - } - VK_RenderModelDynamicBegin( studioRenderModeToRenderType(RI.currententity->curstate.rendermode), color, g_studio.rotationmatrix, "%s", m_pSubModel->name ); - - g_studio.numverts = g_studio.numelems = 0; + // FIXME VK entity->curstate.skin can potentially be animated // safety bounding the skinnum - m_skinnum = bound( 0, RI.currententity->curstate.skin, ( m_pStudioHeader->numskinfamilies - 1 )); - ptexture = (mstudiotexture_t *)((byte *)m_pStudioHeader + m_pStudioHeader->textureindex); - pvertbone = ((byte *)m_pStudioHeader + m_pSubModel->vertinfoindex); - pnormbone = ((byte *)m_pStudioHeader + m_pSubModel->norminfoindex); + const int m_skinnum = bound( 0, RI.currententity->curstate.skin, ( m_pStudioHeader->numskinfamilies - 1 )); + const mstudiotexture_t *const ptexture = (const mstudiotexture_t *)((const byte *)m_pStudioHeader + m_pStudioHeader->textureindex); + const byte *const pvertbone = ((const byte *)m_pStudioHeader + m_pSubModel->vertinfoindex); + const byte *pnormbone = ((const byte *)m_pStudioHeader + m_pSubModel->norminfoindex); - pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex); - pstudioverts = (vec3_t *)((byte *)m_pStudioHeader + m_pSubModel->vertindex); - pstudionorms = (vec3_t *)((byte *)m_pStudioHeader + m_pSubModel->normindex); + const vec3_t *pstudioverts = (const vec3_t *)((const byte *)m_pStudioHeader + m_pSubModel->vertindex); + const vec3_t *pstudionorms = (const vec3_t *)((const byte *)m_pStudioHeader + m_pSubModel->normindex); - pskinref = (short *)((byte *)m_pStudioHeader + m_pStudioHeader->skinindex); + const short *pskinref = (short *)((byte *)m_pStudioHeader + m_pStudioHeader->skinindex); if( m_skinnum != 0 ) pskinref += (m_skinnum * m_pStudioHeader->numskinref); // Compute inverse entity matrix, as we need vertices to be in local model space instead of global world space. @@ -2173,11 +1908,11 @@ static void R_StudioDrawPoints( void ) if( FBitSet( m_pStudioHeader->flags, STUDIO_HAS_BONEWEIGHTS ) && m_pSubModel->blendvertinfoindex != 0 && m_pSubModel->blendnorminfoindex != 0 ) { - mstudioboneweight_t *pvertweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendvertinfoindex); - mstudioboneweight_t *pnormweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendnorminfoindex); - matrix3x4 skinMat; + const mstudioboneweight_t *const pvertweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendvertinfoindex); + const mstudioboneweight_t *const pnormweight = (mstudioboneweight_t *)((byte *)m_pStudioHeader + m_pSubModel->blendnorminfoindex); + matrix3x4 skinMat; - for( i = 0; i < m_pSubModel->numverts; i++ ) + for( int i = 0; i < m_pSubModel->numverts; i++ ) { R_StudioComputeSkinMatrix( &pvertweight[i], g_studio.worldtransform, skinMat ); @@ -2188,15 +1923,7 @@ static void R_StudioDrawPoints( void ) R_LightStrength( pvertbone[i], pstudioverts[i], g_studio.lightpos[i] ); } - R_PrevFrame_SaveCurrentBoneTransforms( RI.currententity->index, g_studio.worldtransform, rotationmatrix_inv); - matrix3x4* prev_bones_transforms = R_PrevFrame_BoneTransforms( RI.currententity->index ); - for( i = 0; i < m_pSubModel->numverts; i++ ) - { - R_StudioComputeSkinMatrix( &pvertweight[i], prev_bones_transforms, skinMat ); - Matrix3x4_VectorTransform( skinMat, pstudioverts[i], g_studio.prev_verts[i] ); - } - - for( i = 0; i < m_pSubModel->numnorms; i++ ) + for( int i = 0; i < m_pSubModel->numnorms; i++ ) { R_StudioComputeSkinMatrix( &pnormweight[i], g_studio.worldtransform, skinMat ); @@ -2207,21 +1934,17 @@ static void R_StudioDrawPoints( void ) } else { - R_PrevFrame_SaveCurrentBoneTransforms( RI.currententity->index, g_studio.bonestransform, rotationmatrix_inv ); - - matrix3x4* prev_bones_transforms = R_PrevFrame_BoneTransforms( RI.currententity->index ); - for( i = 0; i < m_pSubModel->numverts; i++ ) + for( int i = 0; i < m_pSubModel->numverts; i++ ) { vec3_t v; Matrix3x4_VectorTransform( g_studio.bonestransform[pvertbone[i]], pstudioverts[i], v); Matrix3x4_VectorTransform( rotationmatrix_inv, v, g_studio.verts[i] ); R_LightStrength( pvertbone[i], pstudioverts[i], g_studio.lightpos[i] ); - - Matrix3x4_VectorTransform( prev_bones_transforms[pvertbone[i]], pstudioverts[i], g_studio.prev_verts[i] ); } } // generate shared normals for properly scaling glowing shell + float shellscale = 0.0f; if( RI.currententity->curstate.renderfx == kRenderFxGlowShell ) { float factor = (1.0f / 128.0f); @@ -2231,71 +1954,72 @@ static void R_StudioDrawPoints( void ) R_StudioGenerateNormals(); - for( j = k = 0; j < m_pSubModel->nummesh; j++ ) + const mstudiomesh_t *const pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex); + + qboolean need_sort = false; + for( int j = 0, k = 0; j < m_pSubModel->nummesh; j++ ) { - g_nFaceFlags = ptexture[pskinref[pmesh[j].skinref]].flags | g_nForceFaceFlags; + const int face_flags = ptexture[pskinref[pmesh[j].skinref]].flags | g_nForceFaceFlags; // fill in sortedmesh info - g_studio.meshes[j].flags = g_nFaceFlags; + g_studio.meshes[j].flags = face_flags; g_studio.meshes[j].mesh = &pmesh[j]; - if( FBitSet( g_nFaceFlags, STUDIO_NF_MASKED|STUDIO_NF_ADDITIVE )) + // FIXME VK cannot into "dynamic" blending/alpha-test + if( FBitSet( face_flags, STUDIO_NF_MASKED|STUDIO_NF_ADDITIVE )) need_sort = true; if( RI.currententity->curstate.rendermode == kRenderTransAdd ) { - for( i = 0; i < pmesh[j].numnorms; i++, k++, pstudionorms++, pnormbone++ ) + for( int i = 0; i < pmesh[j].numnorms; i++, k++, pstudionorms++, pnormbone++ ) { // FIXME VK const struct { float blend; } tr = {1.f}; - if( FBitSet( g_nFaceFlags, STUDIO_NF_CHROME )) + if( FBitSet( face_flags, STUDIO_NF_CHROME )) R_StudioSetupChrome( g_studio.chrome[k], *pnormbone, (float *)pstudionorms ); VectorSet( g_studio.lightvalues[k], g_studio.blend, g_studio.blend, g_studio.blend ); } } else { - for( i = 0; i < pmesh[j].numnorms; i++, k++, pstudionorms++, pnormbone++ ) - { + for( int i = 0; i < pmesh[j].numnorms; i++, k++, pstudionorms++, pnormbone++ ) { + float lv_tmp; if( FBitSet( m_pStudioHeader->flags, STUDIO_HAS_BONEWEIGHTS )) - R_StudioLighting( &lv_tmp, -1, g_nFaceFlags, g_studio.norms[k] ); - else R_StudioLighting( &lv_tmp, *pnormbone, g_nFaceFlags, (float *)pstudionorms ); + R_StudioLighting( &lv_tmp, -1, face_flags, g_studio.norms[k] ); + else R_StudioLighting( &lv_tmp, *pnormbone, face_flags, (float *)pstudionorms ); - if( FBitSet( g_nFaceFlags, STUDIO_NF_CHROME )) + if( FBitSet( face_flags, STUDIO_NF_CHROME )) R_StudioSetupChrome( g_studio.chrome[k], *pnormbone, (float *)pstudionorms ); VectorScale( g_studio.lightcolor, lv_tmp, g_studio.lightvalues[k] ); } } } + /* FIXME VK + * this might potentially break blas update topology if( need_sort ) { // resort opaque and translucent meshes draw order qsort( g_studio.meshes, m_pSubModel->nummesh, sizeof( sortedmesh_t ), R_StudioMeshCompare ); } + */ // NOTE: rewind normals at start - pstudionorms = (vec3_t *)((byte *)m_pStudioHeader + m_pSubModel->normindex); + pstudionorms = (const vec3_t *)((const byte *)m_pStudioHeader + m_pSubModel->normindex); - for( j = 0; j < m_pSubModel->nummesh; j++ ) - { - float oldblend = g_studio.blend; - uint startArrayVerts = g_studio.numverts; - uint startArrayElems = g_studio.numelems; - short *ptricmds; - float s, t; - int texture; + int vertices_offset = 0, indices_offset = 0; + for( int j = 0; j < m_pSubModel->nummesh; j++ ) { + const mstudiomesh_t *const pmesh = g_studio.meshes[j].mesh; + const short *const ptricmds = (short *)((byte *)m_pStudioHeader + pmesh->triindex); - pmesh = g_studio.meshes[j].mesh; - ptricmds = (short *)((byte *)m_pStudioHeader + pmesh->triindex); + const int face_flags = ptexture[pskinref[pmesh->skinref]].flags | g_nForceFaceFlags; - g_nFaceFlags = ptexture[pskinref[pmesh->skinref]].flags | g_nForceFaceFlags; - - s = 1.0f / (float)ptexture[pskinref[pmesh->skinref]].width; - t = 1.0f / (float)ptexture[pskinref[pmesh->skinref]].height; + const float s = 1.0f / (float)ptexture[pskinref[pmesh->skinref]].width; + const float t = 1.0f / (float)ptexture[pskinref[pmesh->skinref]].height; /* FIXME VK - if( FBitSet( g_nFaceFlags, STUDIO_NF_MASKED )) + const float oldblend = g_studio.blend; + if( FBitSet( face_flags, STUDIO_NF_MASKED )) { pglEnable( GL_ALPHA_TEST ); pglAlphaFunc( GL_GREATER, 0.5f ); @@ -2303,7 +2027,7 @@ static void R_StudioDrawPoints( void ) if( R_ModelOpaque( RI.currententity->curstate.rendermode )) g_studio.blend = 1.0f; } - else if( FBitSet( g_nFaceFlags, STUDIO_NF_ADDITIVE )) + else if( FBitSet( face_flags, STUDIO_NF_ADDITIVE )) { if( R_ModelOpaque( RI.currententity->curstate.rendermode )) { @@ -2316,21 +2040,41 @@ static void R_StudioDrawPoints( void ) } */ - texture = R_StudioSetupSkin( m_pStudioHeader, pskinref[pmesh->skinref] ); + const int texture = R_StudioSetupSkin( m_pStudioHeader, pskinref[pmesh->skinref] ); - /* FIXME VK if( FBitSet( g_nFaceFlags, STUDIO_NF_CHROME )) + /* FIXME VK if( FBitSet( face_flags, STUDIO_NF_CHROME )) R_StudioDrawChromeMesh( ptricmds, pstudionorms, s, t, shellscale ); - else if( FBitSet( g_nFaceFlags, STUDIO_NF_UV_COORDS )) + else if( FBitSet( face_flags, STUDIO_NF_UV_COORDS )) R_StudioDrawFloatMesh( ptricmds, pstudionorms ); - else*/ R_StudioDrawNormalMesh( ptricmds, pstudionorms, s, t, texture ); + else*/ + + buildSubmodelMeshGeometry((build_submodel_mesh_t){ + .ptricmds = ptricmds, + .pstudionorms = pstudionorms, + .prev_verts = args.prev_verts, + .s = s, + .t = t, + .texture = texture, + .face_flags = face_flags, + .vertices_offset = args.geometry->vertices.unit_offset + vertices_offset, + .indices_offset = args.geometry->indices.unit_offset + indices_offset, + .dst_vertices = geom_lock.vertices + vertices_offset, + .dst_indices = geom_lock.indices + indices_offset, + .out_geometry = args.geometries + j, + .out_vertices_count = &vertices_offset, + .out_indices_count = &indices_offset, + }); + + ASSERT(vertices_offset <= args.vertex_count); + ASSERT(indices_offset <= args.index_count); /* FIXME VK - if( FBitSet( g_nFaceFlags, STUDIO_NF_MASKED )) + if( FBitSet( face_flags, STUDIO_NF_MASKED )) { pglAlphaFunc( GL_GREATER, DEFAULT_ALPHATEST ); pglDisable( GL_ALPHA_TEST ); } - else if( FBitSet( g_nFaceFlags, STUDIO_NF_ADDITIVE ) && R_ModelOpaque( RI.currententity->curstate.rendermode )) + else if( FBitSet( face_flags, STUDIO_NF_ADDITIVE ) && R_ModelOpaque( RI.currententity->curstate.rendermode )) { pglDepthMask( GL_TRUE ); pglDisable( GL_BLEND ); @@ -2342,7 +2086,223 @@ static void R_StudioDrawPoints( void ) */ } - VK_RenderModelDynamicCommit(); + R_GeometryRangeUnlock(&geom_lock); +} + +static qboolean studioSubmodelRenderInit(r_studio_submodel_render_t *render_submodel, const mstudiomodel_t *submodel, qboolean is_dynamic) { + // Compute vertex and index counts. + // TODO should this be part of r_studio_model_info_t? + int vertex_count = 0, index_count = 0; + { + const mstudiomesh_t *const pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex); + for(int i = 0; i < submodel->nummesh; i++) { + const short* const ptricmds = (short *)((byte *)m_pStudioHeader + pmesh[i].triindex); + addVerticesIndicesCounts(ptricmds, &vertex_count, &index_count); + } + + ASSERT(vertex_count > 0); + ASSERT(index_count > 0); + } + + // TODO can be coalesced into a single allocation for the entire model + const r_geometry_range_t geometry = R_GeometryRangeAlloc(vertex_count, index_count); + if (geometry.block_handle.size == 0) { + ERR("Unable to allocate %d vertices %d indices for submodel %s", + vertex_count, index_count, submodel->name); + return false; + } + + // TODO can be coalesced + vk_render_geometry_t *const geometries = Mem_Malloc(vk_core.pool, submodel->nummesh * sizeof(*geometries)); + ASSERT(geometries); + + const size_t verts_size = sizeof(vec3_t) * submodel->numverts; + render_submodel->prev_verts = Mem_Malloc(vk_core.pool, verts_size); + memcpy(render_submodel->prev_verts, g_studio.verts, verts_size); + + buildStudioSubmodelGeometry((build_submodel_geometry_t){ + //.submodel = submodel, + .geometry = &geometry, + .geometries = geometries, + .vertex_count = vertex_count, + .index_count = index_count, + .prev_verts = render_submodel->prev_verts, + }); + + render_submodel->geometries = geometries; + render_submodel->geometries_count = submodel->nummesh; + render_submodel->geometry_range = geometry; + render_submodel->vertex_count = vertex_count; + render_submodel->index_count = index_count; + + if (!R_RenderModelCreate(&render_submodel->model, (vk_render_model_init_t){ + .name = submodel->name, + .geometries = geometries, + .geometries_count = submodel->nummesh, + .dynamic = is_dynamic, + })) { + ERR("Unable to create render model for studio submodel %s", submodel->name); + Mem_Free(geometries); + // FIXME everything else leaks ;_; + // FIXME sync up with staging and free + memset(render_submodel, 0, sizeof(*render_submodel)); + return false; + } + + return true; +} + +static qboolean studioSubmodelRenderUpdate(const r_studio_submodel_render_t *submodel_render, const mstudiomodel_t *submodel) { + buildStudioSubmodelGeometry((build_submodel_geometry_t){ + //.submodel = submodel_render->key_submodel, + .geometry = &submodel_render->geometry_range, + .geometries = submodel_render->geometries, + .vertex_count = submodel_render->vertex_count, + .index_count = submodel_render->index_count, + .prev_verts = submodel_render->prev_verts, + }); + + // Remember previous frame verts + const size_t verts_size = sizeof(vec3_t) * submodel->numverts; + memcpy(submodel_render->prev_verts, g_studio.verts, verts_size); + + return R_RenderModelUpdate(&submodel_render->model); +} + +static void studioEntityModelDestroy(void *userdata) { + r_studio_entity_model_t *entmodel = (r_studio_entity_model_t*)userdata; + for (int i = 0; i < entmodel->bodyparts_count; ++i) { + r_studio_submodel_render_t *const render = entmodel->bodyparts[i]; + studioSubmodelRenderModelRelease(render); + } + if (entmodel->bodyparts) + Mem_Free(entmodel->bodyparts); +} + +static r_studio_entity_model_t *studioEntityModelCreate(const cl_entity_t *entity) { + r_studio_entity_model_t *const entmodel = Mem_Calloc(vk_core.pool, sizeof(r_studio_entity_model_t)); + + entmodel->studio_header = m_pStudioHeader; + entmodel->bodyparts_count = m_pStudioHeader->numbodyparts; // TODO is this correct number? + entmodel->bodyparts = Mem_Calloc(vk_core.pool, sizeof(*entmodel->bodyparts) * entmodel->bodyparts_count); + + Matrix4x4_LoadIdentity(entmodel->transform); + Matrix4x4_LoadIdentity(entmodel->prev_transform); + Matrix3x4_Copy(entmodel->prev_transform, g_studio.rotationmatrix); + + entmodel->model_info = getStudioModelInfo(entity->model); + ASSERT(entmodel->model_info); + + return entmodel; +} + +static r_studio_entity_model_t *studioEntityModelGet(const cl_entity_t* entity) { + r_studio_entity_model_t *entmodel = (r_studio_entity_model_t*)VK_EntityDataGet(entity); + if (entmodel && entmodel->studio_header == m_pStudioHeader) + return entmodel; + + entmodel = studioEntityModelCreate(entity); + if (!entmodel) { + ERR("Cannot create studio entity model for %s", entity->model->name); + return NULL; + } + + DEBUG("Created studio entity %p model %s: %p (bodyparts=%d)", + entity, entity->model->name, entmodel, entmodel->bodyparts_count); + + VK_EntityDataSet(entity, entmodel, &studioEntityModelDestroy); + return entmodel; +} + +static r_studio_submodel_info_t *studioModelFindSubmodelInfo(void) { + for (int i = 0; i < g_studio_current.entmodel->model_info->submodels_count; ++i) { + r_studio_submodel_info_t *const subinfo = g_studio_current.entmodel->model_info->submodels + i; + if (subinfo->submodel_key == m_pSubModel) + return subinfo; + } + + return NULL; +} + +// Draws current studio model submodel +// Can be called externally, i.e. from game dll. +// Expects m_pStudioHeader, m_pSubModel, RI.currententity, etc to be already set up +static void R_StudioDrawPoints( void ) { + if( !m_pStudioHeader || !m_pSubModel || !m_pSubModel->nummesh) + return; + + ASSERT(g_studio_current.bodypart_index >= 0); + + // Ideally, this "get current entity and model" stuff should happen early, when we're just starting to + // draw this entity/model. However, call structure/graph is a bit weird: we start rendering in ref code, + // but relevant states (transform, various headers) are updated only later, and potentially in game dll code. + // So we're forced to do this later here, when it is guaranteed that all the relevant state has been set. + if (!g_studio_current.entmodel) { + g_studio_current.entmodel = studioEntityModelGet(RI.currententity); + Matrix3x4_Copy(g_studio_current.entmodel->transform, g_studio.rotationmatrix); + } + + ASSERT(g_studio_current.bodypart_index >= 0); + ASSERT(g_studio_current.bodypart_index < g_studio_current.entmodel->bodyparts_count); + + r_studio_submodel_render_t *render_submodel = g_studio_current.entmodel->bodyparts[g_studio_current.bodypart_index]; + + // Submodels for bodyparts can potentially change at runtime + if (!render_submodel || render_submodel->_.info->submodel_key != m_pSubModel) { + if (render_submodel) { + // This does happen in practice a lot. Shouldn't be a warning. + DEBUG("Detected bodypart submodel change from %s to %s for model %s entity %p(%d)", render_submodel->_.info->submodel_key->name, m_pSubModel->name, m_pStudioHeader->name, RI.currententity, RI.currententity->index); + + studioSubmodelRenderModelRelease(render_submodel); + render_submodel = g_studio_current.entmodel->bodyparts[g_studio_current.bodypart_index] = NULL; + } + + r_studio_submodel_info_t *const subinfo = studioModelFindSubmodelInfo(); + if (!subinfo) { + ERR("Submodel %s info not found for model %s, this should be impossible", m_pSubModel->name, m_pStudioHeader->name); + return; + } + + render_submodel = g_studio_current.entmodel->bodyparts[g_studio_current.bodypart_index] = studioSubmodelRenderModelAcquire(subinfo); + ASSERT(render_submodel); + ASSERT(render_submodel->_.info); + } + + const qboolean is_dynamic = render_submodel->_.info->is_dynamic; + + if (!render_submodel->geometries) { + if (!studioSubmodelRenderInit(render_submodel, m_pSubModel, is_dynamic)) { + ERR("Unable to init studio submodel for %s/%d", RI.currentmodel->name, g_studio_current.bodypart_index); + return; + } + + DEBUG("Initialized studio submodel for %s // %s", RI.currentmodel->name, render_submodel->_.info->submodel_key->name); + } else if (is_dynamic) { + if (!studioSubmodelRenderUpdate(render_submodel, m_pSubModel)) { + ERR("Unable to update studio submodel for %s/%d", RI.currentmodel->name, g_studio_current.bodypart_index); + return; + } + } + + if (is_dynamic) + ++g_studio_stats.submodels_dynamic; + else + ++g_studio_stats.submodels_static; + + vec4_t color = {1, 1, 1, g_studio.blend}; + if (g_studio.rendermode2 == kRenderTransAdd) + Vector4Set(color, g_studio.blend, g_studio.blend, g_studio.blend, 1.f); + + // TODO r_model_draw_t.transform should be matrix3x4 + R_RenderModelDraw(&render_submodel->model, (r_model_draw_t){ + .render_type = studioRenderModeToRenderType(RI.currententity->curstate.rendermode), + .color = &color, + .transform = &g_studio_current.entmodel->transform, + .prev_transform = &g_studio_current.entmodel->prev_transform, + .textures_override = -1, + }); + + ++g_studio_stats.submodels_total; } static void R_StudioSetRemapColors( int newTop, int newBottom ) @@ -2361,6 +2321,8 @@ void R_StudioResetPlayerModels( void ) memset( g_studio.player_models, 0, sizeof( g_studio.player_models )); } +static player_info_t *pfnPlayerInfo( int index ); + static model_t *R_StudioSetupPlayerModel( int index ) { player_info_t *info = gEngine.pfnPlayerInfo( index ); @@ -2514,57 +2476,27 @@ static void R_StudioClientEvents( void ) } } -/* -=============== -R_StudioGetForceFaceFlags - -=============== -*/ int R_StudioGetForceFaceFlags( void ) { return g_nForceFaceFlags; } -/* -=============== -R_StudioSetForceFaceFlags - -=============== -*/ void R_StudioSetForceFaceFlags( int flags ) { g_nForceFaceFlags = flags; } -/* -=============== -pfnStudioSetHeader - -=============== -*/ void R_StudioSetHeader( studiohdr_t *pheader ) { m_pStudioHeader = pheader; m_fDoRemap = false; } -/* -=============== -R_StudioSetRenderModel - -=============== -*/ void R_StudioSetRenderModel( model_t *model ) { RI.currentmodel = model; } -/* -=============== -R_StudioSetupRenderer - -=============== -*/ static void R_StudioSetupRenderer( int rendermode ) { studiohdr_t *phdr = m_pStudioHeader; @@ -2728,7 +2660,6 @@ static void R_StudioRenderFinal( void ) R_StudioSetupRenderer( rendermode ); VK_RenderDebugLabelBegin( RI.currentmodel->name ); - for( i = 0; i < m_pStudioHeader->numbodyparts; i++ ) { R_StudioSetupModel( i, (void**)&m_pBodyPart, (void**)&m_pSubModel ); @@ -2737,10 +2668,9 @@ static void R_StudioRenderFinal( void ) R_StudioDrawPoints(); GL_StudioDrawShadow(); } + VK_RenderDebugLabelEnd(); R_StudioRestoreRenderer(); - - VK_RenderDebugLabelEnd(); } void R_StudioRenderModel( void ) @@ -3022,6 +2952,8 @@ static int R_StudioDrawPlayer( int flags, entity_state_t *pplayer ) return 1; } +static entity_state_t *R_StudioGetPlayerState( int index ); + static int R_StudioDrawModel( int flags ) { alight_t lighting; @@ -3117,22 +3049,36 @@ static void R_StudioDrawModelInternal( cl_entity_t *e, int flags ) { VK_RenderDebugLabelBegin( e->model->name ); + // Mark this a new model to draw + g_studio_current.entmodel = NULL; + g_studio_current.bodypart_index = -1; + ++g_studio_stats.models_count; if( !RI.drawWorld ) { if( e->player ) R_StudioDrawPlayer( flags, &e->curstate ); - else R_StudioDrawModel( flags ); + else + R_StudioDrawModel( flags ); } else { // select the properly method if( e->player ) pStudioDraw->StudioDrawPlayer( flags, R_StudioGetPlayerState( e->index - 1 )); - else pStudioDraw->StudioDrawModel( flags ); + else + pStudioDraw->StudioDrawModel( flags ); } + if (g_studio_current.entmodel) { + Matrix4x4_Copy(g_studio_current.entmodel->prev_transform, g_studio_current.entmodel->transform); + } + + // Reset current state, no drawing should happen outside of this function + g_studio_current.entmodel = NULL; + g_studio_current.bodypart_index = -1; + VK_RenderDebugLabelEnd(); } @@ -3420,6 +3366,54 @@ void Mod_StudioUnloadTextures( void *data ) } } +static cl_entity_t *pfnGetCurrentEntity( void ) +{ + return RI.currententity; +} + +static player_info_t *pfnPlayerInfo( int index ) +{ + if( !RI.drawWorld ) + index = -1; + + return gEngine.pfnPlayerInfo( index ); +} + +static model_t *pfnMod_ForName( const char *model, int crash ) +{ + return gEngine.Mod_ForName( model, crash, false ); +} + +static entity_state_t *R_StudioGetPlayerState( int index ) +{ + if( !RI.drawWorld ) + return &RI.currententity->curstate; + + return gEngine.pfnGetPlayerState( index ); +} + +static cl_entity_t *pfnGetViewEntity( void ) +{ + return gEngine.GetViewModel(); +} + +static void pfnGetEngineTimes( int *framecount, double *current, double *old ) +{ + /* FIXME VK NOT IMPLEMENTED */ + /* if( framecount ) *framecount = tr.realframecount; */ + if( framecount ) *framecount = 0; + if( current ) *current = gpGlobals->time; + if( old ) *old = gpGlobals->oldtime; +} + +static void pfnGetViewInfo( float *origin, float *upv, float *rightv, float *forwardv ) +{ + if( origin ) VectorCopy( g_camera.vieworg, origin ); + if( forwardv ) VectorCopy( g_camera.vforward, forwardv ); + if( rightv ) VectorCopy( g_camera.vright, rightv ); + if( upv ) VectorCopy( g_camera.vup, upv ); +} + static model_t *pfnModelHandle( int modelindex ) { return gEngine.pfnGetModelByIndex( modelindex ); @@ -3465,6 +3459,43 @@ static void R_StudioDrawBones( void ) PRINT_NOT_IMPLEMENTED(); } +static int fixme_studio_models_drawn; + +static void pfnGetModelCounters( int **s, int **a ) +{ + *s = &g_studio.framecount; + + /* FIXME VK NOT IMPLEMENTED */ + /* *a = &r_stats.c_studio_models_drawn; */ + *a = &fixme_studio_models_drawn; +} + +static void pfnGetAliasScale( float *x, float *y ) +{ + if( x ) *x = 1.0f; + if( y ) *y = 1.0f; +} + +static float ****pfnStudioGetBoneTransform( void ) +{ + return (float ****)g_studio.bonestransform; +} + +static float ****pfnStudioGetLightTransform( void ) +{ + return (float ****)g_studio.lighttransform; +} + +static float ***pfnStudioGetAliasTransform( void ) +{ + return NULL; +} + +static float ***pfnStudioGetRotationMatrix( void ) +{ + return (float ***)g_studio.rotationmatrix; +} + static engine_studio_api_t gStudioAPI = { pfnMod_Calloc, @@ -3544,16 +3575,26 @@ void CL_InitStudioAPI( void ) void VK_StudioInit( void ) { - R_StudioInit(); + Matrix3x4_LoadIdentity( g_studio.rotationmatrix ); + + // g-cont. cvar disabled by Valve +// gEngine.Cvar_RegisterVariable( &r_shadows ); + + g_studio.interpolate = true; + g_studio.framecount = 0; + m_fDoRemap = false; + + R_SPEEDS_COUNTER(g_studio_stats.models_count, "models", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_studio_stats.submodels_total, "submodels_total", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_studio_stats.submodels_static, "submodels_static", kSpeedsMetricCount); + R_SPEEDS_COUNTER(g_studio_stats.submodels_dynamic, "submodels_dynamic", kSpeedsMetricCount); + + VK_StudioModelInit(); } void VK_StudioShutdown( void ) { -} - -void Mod_LoadStudioModel( model_t *mod, const void *buffer, qboolean *loaded ) -{ - PRINT_NOT_IMPLEMENTED_ARGS("(%s)", mod->name); + R_StudioCacheClear(); } void VK_StudioDrawModel( cl_entity_t *ent, int render_mode, float blend ) diff --git a/ref/vk/vk_studio.h b/ref/vk/vk_studio.h index 5ef32013..ab367351 100644 --- a/ref/vk/vk_studio.h +++ b/ref/vk/vk_studio.h @@ -9,7 +9,6 @@ struct model_s; void VK_StudioInit( void ); void VK_StudioShutdown( void ); -void Mod_LoadStudioModel( model_t *mod, const void *buffer, qboolean *loaded ); void Mod_StudioLoadTextures( model_t *mod, void *data ); void VK_StudioDrawModel( cl_entity_t *ent, int render_mode, float blend ); @@ -21,3 +20,9 @@ void CL_InitStudioAPI( void ); float R_StudioEstimateFrame( cl_entity_t *e, mstudioseqdesc_t *pseqdesc, double time ); void R_StudioLerpMovement( cl_entity_t *e, double time, vec3_t origin, vec3_t angles ); + +qboolean R_StudioModelPreload(model_t *mod); + +void R_StudioCacheClear( void ); + +void R_StudioResetPlayerModels( void ); diff --git a/ref/vk/vk_studio_model.c b/ref/vk/vk_studio_model.c new file mode 100644 index 00000000..962af38c --- /dev/null +++ b/ref/vk/vk_studio_model.c @@ -0,0 +1,266 @@ +#include "vk_studio_model.h" +#include "r_speeds.h" +#include "vk_entity_data.h" +#include "vk_logs.h" + +#include "xash3d_mathlib.h" + +#define MODULE_NAME "studio" +#define LOG_MODULE LogModule_Studio + +typedef struct { + const studiohdr_t *studio_header_key; + r_studio_model_info_t info; +} r_studio_model_info_entry_t; + +static struct { +#define MAX_STUDIO_MODELS 256 + r_studio_model_info_entry_t models[MAX_STUDIO_MODELS]; + int models_count; + + int submodels_cached_dynamic; + int submodels_cached_static; +} g_studio_cache; + +void studioRenderSubmodelDestroy( r_studio_submodel_render_t *submodel ) { + R_RenderModelDestroy(&submodel->model); + R_GeometryRangeFree(&submodel->geometry_range); + if (submodel->geometries) + Mem_Free(submodel->geometries); + if (submodel->prev_verts) + Mem_Free(submodel->prev_verts); +} + +static void studioSubmodelInfoDestroy(r_studio_submodel_info_t *subinfo) { + // Not zero means that something still holds a cached render submodel instance somewhere + ASSERT(subinfo->render_refcount == 0); + + while (subinfo->cached_head) { + r_studio_submodel_render_t *render = subinfo->cached_head; + subinfo->cached_head = subinfo->cached_head->_.next; + studioRenderSubmodelDestroy(render); + } +} + +void R_StudioCacheClear( void ) { + for (int i = 0; i < g_studio_cache.models_count; ++i) { + r_studio_model_info_t *info = &g_studio_cache.models[i].info; + + for (int j = 0; j < info->submodels_count; ++j) + studioSubmodelInfoDestroy(info->submodels + j); + + if (info->submodels) + Mem_Free(info->submodels); + } + g_studio_cache.models_count = 0; + + g_studio_cache.submodels_cached_dynamic = g_studio_cache.submodels_cached_static = 0; +} + +static struct { + vec4_t first_q[MAXSTUDIOBONES]; + float first_pos[MAXSTUDIOBONES][3]; + + vec4_t q[MAXSTUDIOBONES]; + float pos[MAXSTUDIOBONES][3]; +} gb; + +static void studioModelCalcBones(int numbones, const mstudiobone_t *pbone, const mstudioanim_t *panim, int frame, float out_pos[][3], vec4_t *out_q) { + for(int b = 0; b < numbones; b++ ) { + // TODO check pbone->bonecontroller, if the bone can be dynamically controlled by entity + float *const adj = NULL; + const float interpolation = 0; + R_StudioCalcBoneQuaternion( frame, interpolation, pbone + b, panim + b, adj, out_q[b] ); + R_StudioCalcBonePosition( frame, interpolation, pbone + b, panim + b, adj, out_pos[b] ); + } +} + +qboolean Vector4CompareEpsilon( const vec4_t vec1, const vec4_t vec2, vec_t epsilon ) +{ + vec_t ax, ay, az, aw; + + ax = fabs( vec1[0] - vec2[0] ); + ay = fabs( vec1[1] - vec2[1] ); + az = fabs( vec1[2] - vec2[2] ); + aw = fabs( vec1[3] - vec2[3] ); + + if(( ax <= epsilon ) && ( ay <= epsilon ) && ( az <= epsilon ) && ( aw <= epsilon )) + return true; + return false; +} + +static qboolean isBoneSame(int b) { + if (!Vector4CompareEpsilon(gb.first_q[b], gb.q[b], 1e-4f)) + return false; + + if (!VectorCompareEpsilon(gb.first_pos[b], gb.pos[b], 1e-4f)) + return false; + + return true; +} + +static void studioModelProcessBonesAnimations(const model_t *const model, const studiohdr_t *const hdr, r_studio_submodel_info_t *submodels, int submodels_count) { + for (int i = 0; i < hdr->numseq; ++i) { + const mstudioseqdesc_t *const pseqdesc = (mstudioseqdesc_t *)((byte *)hdr + hdr->seqindex) + i; + + const mstudiobone_t* const pbone = (mstudiobone_t *)((byte *)hdr + hdr->boneindex); + const mstudioanim_t* const panim = gEngine.R_StudioGetAnim( (studiohdr_t*)hdr, (model_t*)model, (mstudioseqdesc_t*)pseqdesc ); + + // Compute the first frame bones to compare with + studioModelCalcBones(hdr->numbones, pbone, panim, 0, gb.first_pos, gb.first_q); + + // Compute bones for each frame + for (int frame = 1; frame < pseqdesc->numframes; ++frame) { + studioModelCalcBones(hdr->numbones, pbone, panim, frame, gb.pos, gb.q); + + // Compate bones for each submodel + for (int si = 0; si < submodels_count; ++si) { + r_studio_submodel_info_t *const subinfo = submodels + si; + + // Once detected as dynamic, there's no point in checking further + if (subinfo->is_dynamic) + continue; + + const mstudiomodel_t *const submodel = subinfo->submodel_key; + const qboolean use_boneweights = FBitSet(hdr->flags, STUDIO_HAS_BONEWEIGHTS) && submodel->blendvertinfoindex != 0 && submodel->blendnorminfoindex != 0; + + if (use_boneweights) { + const mstudioboneweight_t *const pvertweight = (mstudioboneweight_t *)((byte *)hdr + submodel->blendvertinfoindex); + for(int vi = 0; vi < submodel->numverts; vi++) { + for (int bi = 0; bi < 4; ++bi) { + const int8_t bone = pvertweight[vi].bone[bi]; + if (bone == -1) + break; + + subinfo->is_dynamic |= !isBoneSame(bone); + if (subinfo->is_dynamic) + break; + } + if (subinfo->is_dynamic) + break; + } // for submodel verts + + } /* use_boneweights */ else { + const byte *const pvertbone = ((const byte *)hdr + submodel->vertinfoindex); + for(int vi = 0; vi < submodel->numverts; vi++) { + subinfo->is_dynamic |= !isBoneSame(pvertbone[vi]); + if (subinfo->is_dynamic) + break; + } + } // no use_boneweights + } // for all submodels + } // for all frames + } // for all sequences +} + +// Get submodels count and/or fill submodels array +static int studioModelGetSubmodels(const studiohdr_t *hdr, r_studio_submodel_info_t *out_submodels) { + int count = 0; + for (int i = 0; i < hdr->numbodyparts; ++i) { + const mstudiobodyparts_t* const bodypart = (mstudiobodyparts_t *)((byte *)hdr + hdr->bodypartindex) + i; + if (out_submodels) { + DEBUG(" Bodypart %d/%d: %s (nummodels=%d)", i, hdr->numbodyparts - 1, bodypart->name, bodypart->nummodels); + for (int j = 0; j < bodypart->nummodels; ++j) { + const mstudiomodel_t * const submodel = (mstudiomodel_t *)((byte *)hdr + bodypart->modelindex) + j; + DEBUG(" Submodel %d: %s", j, submodel->name); + out_submodels[count++].submodel_key = submodel; + } + } else { + count += bodypart->nummodels; + } + } + return count; +} + +qboolean R_StudioModelPreload(model_t *mod) { + const studiohdr_t *const hdr = (const studiohdr_t *)gEngine.Mod_Extradata(mod_studio, mod); + + ASSERT(g_studio_cache.models_count < MAX_STUDIO_MODELS); + + r_studio_model_info_entry_t *entry = &g_studio_cache.models[g_studio_cache.models_count++]; + entry->studio_header_key = hdr; + + DEBUG("Studio model %s, sequences = %d:", hdr->name, hdr->numseq); + for (int i = 0; i < hdr->numseq; ++i) { + const mstudioseqdesc_t *const pseqdesc = (mstudioseqdesc_t *)((byte *)hdr + hdr->seqindex) + i; + DEBUG(" %d: fps=%f numframes=%d", i, pseqdesc->fps, pseqdesc->numframes); + } + + // Get submodel array + const int submodels_count = studioModelGetSubmodels(hdr, NULL); + r_studio_submodel_info_t *submodels = Mem_Calloc(vk_core.pool, sizeof(*submodels) * submodels_count); + studioModelGetSubmodels(hdr, submodels); + + studioModelProcessBonesAnimations(mod, hdr, submodels, submodels_count); + + qboolean is_dynamic = false; + DEBUG(" submodels_count: %d", submodels_count); + for (int i = 0; i < submodels_count; ++i) { + const r_studio_submodel_info_t *const subinfo = submodels + i; + is_dynamic |= subinfo->is_dynamic; + DEBUG(" Submodel %d/%d: name=\"%s\", is_dynamic=%d", i, submodels_count-1, subinfo->submodel_key->name, subinfo->is_dynamic); + } + + entry->info.submodels_count = submodels_count; + entry->info.submodels = submodels; + + return true; +} + +r_studio_model_info_t *getStudioModelInfo(model_t *model) { + const studiohdr_t *const hdr = (studiohdr_t *)gEngine.Mod_Extradata( mod_studio, model ); + + for (int i = 0; i < g_studio_cache.models_count; ++i) { + r_studio_model_info_entry_t *const entry = g_studio_cache.models + i; + if (entry->studio_header_key == hdr) { + return &entry->info; + } + } + + return NULL; +} + +void VK_StudioModelInit(void) { + R_SPEEDS_METRIC(g_studio_cache.submodels_cached_static, "submodels_cached_static", kSpeedsMetricCount); + R_SPEEDS_METRIC(g_studio_cache.submodels_cached_dynamic, "submodels_cached_dynamic", kSpeedsMetricCount); +} + +r_studio_submodel_render_t *studioSubmodelRenderModelAcquire(r_studio_submodel_info_t *subinfo) { + r_studio_submodel_render_t *render = NULL; + if (subinfo->cached_head) { + render = subinfo->cached_head; + if (subinfo->is_dynamic) { + subinfo->cached_head = render->_.next; + render->_.next = NULL; + } + subinfo->render_refcount++; + return render; + } + + render = Mem_Calloc(vk_core.pool, sizeof(*render)); + render->_.info = subinfo; + + if (!subinfo->is_dynamic) { + subinfo->cached_head = render; + ++g_studio_cache.submodels_cached_static; + } else { + ++g_studio_cache.submodels_cached_dynamic; + } + + subinfo->render_refcount++; + return render; +} + +void studioSubmodelRenderModelRelease(r_studio_submodel_render_t *render_submodel) { + if (!render_submodel) + return; + + ASSERT(render_submodel->_.info->render_refcount > 0); + render_submodel->_.info->render_refcount--; + + if (!render_submodel->_.info->is_dynamic) + return; + + render_submodel->_.next = render_submodel->_.info->cached_head; + render_submodel->_.info->cached_head = render_submodel; +} diff --git a/ref/vk/vk_studio_model.h b/ref/vk/vk_studio_model.h new file mode 100644 index 00000000..d5b4c9a2 --- /dev/null +++ b/ref/vk/vk_studio_model.h @@ -0,0 +1,67 @@ +#pragma once + +#include "vk_render.h" +#include "vk_geometry.h" + +struct r_studio_submodel_info_s; + +// Submodel render data that is enough to render given submodel +// Included render model (that also incapsulates BLAS) +// This can be static (built once), or dynamic (updated frequently) +// Lives in per-model-info submodel cache +typedef struct r_studio_submodel_render_s { + vk_render_model_t model; + r_geometry_range_t geometry_range; + vk_render_geometry_t *geometries; + + // TODO figure out how to precompute this and store it in info + int geometries_count; + int vertex_count, index_count; + + vec3_t *prev_verts; + + struct { + struct r_studio_submodel_info_s *info; + struct r_studio_submodel_render_s *next; + } _; +} r_studio_submodel_render_t; + +// Submodel metadata and render-model cache +typedef struct r_studio_submodel_info_s { + const mstudiomodel_t *submodel_key; + qboolean is_dynamic; + + // TODO int verts_count; for prev_verts + + r_studio_submodel_render_t *cached_head; + + // Mostly for debug: how many cached render models were acquired and not given back + int render_refcount; +} r_studio_submodel_info_t; + +// Submodel cache functions, used in vk_studio.c +r_studio_submodel_render_t *studioSubmodelRenderModelAcquire(r_studio_submodel_info_t *info); +void studioSubmodelRenderModelRelease(r_studio_submodel_render_t *render_submodel); + +typedef struct { + int submodels_count; + r_studio_submodel_info_t *submodels; +} r_studio_model_info_t; + +r_studio_model_info_t *getStudioModelInfo(model_t *model); + +// Entity model cache/pool +typedef struct { + const studiohdr_t *studio_header; + const r_studio_model_info_t *model_info; + + // TODO 3x4 + matrix4x4 transform; + matrix4x4 prev_transform; + + int bodyparts_count; + r_studio_submodel_render_t **bodyparts; +} r_studio_entity_model_t; + +void VK_StudioModelInit(void); +//void VK_StudioModelShutdown(void); diff --git a/ref/vk/vk_swapchain.c b/ref/vk/vk_swapchain.c index e86184d1..b8970087 100644 --- a/ref/vk/vk_swapchain.c +++ b/ref/vk/vk_swapchain.c @@ -21,6 +21,8 @@ static struct { xvk_image_t depth; uint32_t width, height; + + uint32_t recreate_requested; } g_swapchain = {0}; // TODO move to common @@ -196,11 +198,10 @@ r_vk_swapchain_framebuffer_t R_VkSwapchainAcquire( VkSemaphore sem_image_availa APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__); r_vk_swapchain_framebuffer_t ret = {0}; - qboolean force_recreate = false; - for (int i = 0;; ++i) { + for (;;) { // Check that swapchain has the same size - recreateSwapchain(force_recreate); + recreateSwapchain(!!g_swapchain.recreate_requested); APROF_SCOPE_DECLARE_BEGIN_EX(vkAcquireNextImageKHR, "vkAcquireNextImageKHR", APROF_SCOPE_FLAG_WAIT); const VkResult acquire_result = vkAcquireNextImageKHR(vk_core.device, g_swapchain.swapchain, UINT64_MAX, sem_image_available, VK_NULL_HANDLE, &ret.index); @@ -208,6 +209,13 @@ r_vk_swapchain_framebuffer_t R_VkSwapchainAcquire( VkSemaphore sem_image_availa switch (acquire_result) { case VK_SUCCESS: + g_swapchain.recreate_requested = 0; + break; + + case VK_SUBOPTIMAL_KHR: + // Would need to wait on the semaphore here somehow + gEngine.Con_Printf(S_WARN "vkAcquireNextImageKHR returned %s (%0#x), will recreate swapchain for the next frame\n", R_VkResultName(acquire_result), acquire_result); + ++g_swapchain.recreate_requested; break; case VK_ERROR_OUT_OF_HOST_MEMORY: @@ -217,14 +225,24 @@ r_vk_swapchain_framebuffer_t R_VkSwapchainAcquire( VkSemaphore sem_image_availa XVK_CHECK(acquire_result); return ret; - default: - gEngine.Con_Printf(S_WARN "vkAcquireNextImageKHR returned %s (%0#x), recreating swapchain\n", R_VkResultName(acquire_result), acquire_result); - if (i == 0) { - force_recreate = true; - continue; - } - gEngine.Con_Printf(S_WARN "second vkAcquireNextImageKHR failed with %s, frame will be lost\n", R_VkResultName(acquire_result)); + case VK_TIMEOUT: + case VK_NOT_READY: + gEngine.Con_Printf(S_ERROR "vkAcquireNextImageKHR returned %s (%0#x), frame will be lost\n", R_VkResultName(acquire_result), acquire_result); return ret; + + case VK_ERROR_OUT_OF_DATE_KHR: + case VK_ERROR_SURFACE_LOST_KHR: + case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: + default: + gEngine.Con_Printf(S_WARN "vkAcquireNextImageKHR returned %s (%0#x)\n", R_VkResultName(acquire_result), acquire_result); + + if (g_swapchain.recreate_requested) { + gEngine.Con_Printf(S_WARN "second vkAcquireNextImageKHR failed with %s, frame will be lost\n", R_VkResultName(acquire_result)); + return ret; + } + + ++g_swapchain.recreate_requested; + continue; } break; @@ -252,12 +270,17 @@ void R_VkSwapchainPresent( uint32_t index, VkSemaphore done ) { }; const VkResult present_result = vkQueuePresentKHR(vk_core.queue, &presinfo); - switch (present_result) - { + switch (present_result) { case VK_ERROR_OUT_OF_DATE_KHR: case VK_ERROR_SURFACE_LOST_KHR: + case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: gEngine.Con_Printf(S_WARN "vkQueuePresentKHR returned %s, frame will be lost\n", R_VkResultName(present_result)); break; + + case VK_SUBOPTIMAL_KHR: + gEngine.Con_Printf(S_WARN "vkQueuePresentKHR returned %s\n", R_VkResultName(present_result)); + break; + default: XVK_CHECK(present_result); } diff --git a/ref/vk/vk_textures.c b/ref/vk/vk_textures.c index a44817d5..6c47bcd1 100644 --- a/ref/vk/vk_textures.c +++ b/ref/vk/vk_textures.c @@ -7,6 +7,7 @@ #include "vk_descriptor.h" #include "vk_mapents.h" // wadlist #include "vk_combuf.h" +#include "vk_logs.h" #include "xash3d_mathlib.h" #include "crtlib.h" @@ -17,6 +18,8 @@ #include #include +#define LOG_MODULE LogModule_Textures + #define TEXTURES_HASH_SIZE (MAX_TEXTURES >> 2) static vk_texture_t vk_textures[MAX_TEXTURES]; @@ -144,7 +147,7 @@ static qboolean Common_CheckTexName( const char *name ) // because multi-layered textures can exceed name string if( len >= sizeof( vk_textures->name )) { - gEngine.Con_Printf( S_ERROR "LoadTexture: too long name %s (%d)\n", name, len ); + ERR("LoadTexture: too long name %s (%d)", name, len ); return false; } @@ -342,7 +345,7 @@ static VkFormat VK_GetFormat(pixformat_t format) { { case PF_RGBA_32: return VK_FORMAT_R8G8B8A8_UNORM; default: - gEngine.Con_Printf(S_WARN "FIXME unsupported pixformat_t %d\n", format); + WARN("FIXME unsupported pixformat_t %d", format); return VK_FORMAT_UNDEFINED; } } @@ -375,7 +378,7 @@ static size_t CalcImageSize( pixformat_t format, int width, int height, int dept size = (((width + 3) >> 2) * ((height + 3) >> 2) * 16) * depth; break; default: - gEngine.Con_Printf(S_ERROR "unsupported pixformat_t %d\n", format); + ERR("unsupported pixformat_t %d", format); ASSERT(!"Unsupported format encountered"); } @@ -529,7 +532,7 @@ static VkSampler pickSamplerForFlags( texFlags_t flags ) { return tglob.samplers[i].sampler; } - gEngine.Con_Printf(S_ERROR "Couldn't find/allocate sampler for flags %x\n", flags); + ERR("Couldn't find/allocate sampler for flags %x", flags); return tglob.default_sampler_fixme; } @@ -540,8 +543,9 @@ static qboolean uploadTexture(vk_texture_t *tex, rgbdata_t *const *const layers, // TODO non-rbga textures for (int i = 0; i < num_layers; ++i) { + // FIXME create empty black texture if there's no buffer if (!layers[i]->buffer) { - gEngine.Con_Printf(S_ERROR "Texture %s layer %d missing buffer\n", tex->name, i); + ERR("Texture %s layer %d missing buffer", tex->name, i); return false; } @@ -549,18 +553,18 @@ static qboolean uploadTexture(vk_texture_t *tex, rgbdata_t *const *const layers, continue; if (layers[0]->type != layers[i]->type) { - gEngine.Con_Printf(S_ERROR "Texture %s layer %d has type %d inconsistent with layer 0 type %d\n", tex->name, i, layers[i]->type, layers[0]->type); + ERR("Texture %s layer %d has type %d inconsistent with layer 0 type %d", tex->name, i, layers[i]->type, layers[0]->type); return false; } if (layers[0]->width != layers[i]->width || layers[0]->height != layers[i]->height) { - gEngine.Con_Printf(S_ERROR "Texture %s layer %d has resolution %dx%d inconsistent with layer 0 resolution %dx%d\n", + ERR("Texture %s layer %d has resolution %dx%d inconsistent with layer 0 resolution %dx%d", tex->name, i, layers[i]->width, layers[i]->height, layers[0]->width, layers[0]->height); return false; } if ((layers[0]->flags ^ layers[i]->flags) & IMAGE_HAS_ALPHA) { - gEngine.Con_Printf(S_ERROR "Texture %s layer %d has_alpha=%d inconsistent with layer 0 has_alpha=%d\n", + ERR("Texture %s layer %d has_alpha=%d inconsistent with layer 0 has_alpha=%d", tex->name, i, !!(layers[i]->flags & IMAGE_HAS_ALPHA), !!(layers[0]->flags & IMAGE_HAS_ALPHA)); @@ -571,7 +575,7 @@ static qboolean uploadTexture(vk_texture_t *tex, rgbdata_t *const *const layers, tex->height = layers[0]->height; mipCount = CalcMipmapCount( tex, true); - gEngine.Con_Reportf("Uploading texture %s, mips=%d, layers=%d\n", tex->name, mipCount, num_layers); + DEBUG("Uploading texture %s, mips=%d, layers=%d", tex->name, mipCount, num_layers); // TODO this vvv // // NOTE: only single uncompressed textures can be resamples, no mips, no layers, no sides @@ -752,7 +756,7 @@ const char* VK_TextureName( unsigned int texnum ) const byte* VK_TextureData( unsigned int texnum ) { - gEngine.Con_Printf(S_WARN "VK FIXME: %s\n", __FUNCTION__); + PRINT_NOT_IMPLEMENTED_ARGS("texnum=%d", texnum); // We don't store original texture data // TODO do we need to? return NULL; @@ -820,19 +824,19 @@ int XVK_LoadTextureReplace( const char *name, const byte *buf, size_t size, int int VK_CreateTexture( const char *name, int width, int height, const void *buffer, texFlags_t flags ) { - gEngine.Con_Printf("VK FIXME: %s\n", __FUNCTION__); + PRINT_NOT_IMPLEMENTED_ARGS("name=%s width=%d height=%d buffer=%p flags=%08x", name, width, height, buffer, flags); return 0; } int VK_LoadTextureArray( const char **names, int flags ) { - gEngine.Con_Printf("VK FIXME: %s\n", __FUNCTION__); + PRINT_NOT_IMPLEMENTED(); return 0; } int VK_CreateTextureArray( const char *name, int width, int height, int depth, const void *buffer, texFlags_t flags ) { - gEngine.Con_Printf("VK FIXME: %s\n", __FUNCTION__); + PRINT_NOT_IMPLEMENTED_ARGS("name=%s width=%d height=%d buffer=%p flags=%08x", name, width, height, buffer, flags); return 0; } @@ -852,7 +856,7 @@ void VK_FreeTexture( unsigned int texnum ) { // debug if( !tex->name[0] ) { - gEngine.Con_Printf( S_ERROR "GL_DeleteTexture: trying to free unnamed texture with index %u\n", texnum ); + ERR("VK_FreeTexture: trying to free unnamed texture with index %u", texnum ); return; } @@ -944,7 +948,7 @@ int XVK_TextureLookupF( const char *fmt, ...) { va_end( argptr ); tex_id = VK_FindTexture(buffer); - //gEngine.Con_Reportf("Looked up texture %s -> %d\n", buffer, tex_id); + //DEBUG("Looked up texture %s -> %d", buffer, tex_id); return tex_id; } @@ -1017,7 +1021,7 @@ static qboolean loadSkybox( const char *prefix, int style ) { // release old skybox unloadSkybox(); - gEngine.Con_DPrintf( "SKY: " ); + DEBUG( "SKY: " ); for( i = 0; i < 6; i++ ) { char sidename[MAX_STRING]; @@ -1036,7 +1040,7 @@ static qboolean loadSkybox( const char *prefix, int style ) { img_flags |= IMAGE_FORCE_RGBA; gEngine.Image_Process( &sides[i], 0, 0, img_flags, 0.f ); } - gEngine.Con_DPrintf( "%s%s%s", prefix, g_skybox_info[i].suffix, i != 5 ? ", " : ". " ); + DEBUG( "%s%s%s", prefix, g_skybox_info[i].suffix, i != 5 ? ", " : ". " ); } if( i != 6 ) @@ -1054,10 +1058,10 @@ cleanup: if (success) { tglob.fCustomSkybox = true; - gEngine.Con_DPrintf( "done\n" ); + DEBUG( "Skybox done" ); } else { tglob.skybox_cube.name[0] = '\0'; - gEngine.Con_DPrintf( "^2failed\n" ); + ERR( "Skybox failed" ); unloadSkybox(); } @@ -1093,7 +1097,7 @@ void XVK_SetupSky( const char *skyboxname ) { } if (Q_stricmp(skyboxname, skybox_default) != 0) { - gEngine.Con_Reportf( S_WARN "missed or incomplete skybox '%s'\n", skyboxname ); + WARN("missed or incomplete skybox '%s'", skyboxname); XVK_SetupSky( "desert" ); // force to default } } diff --git a/ref/vk/vk_triapi.c b/ref/vk/vk_triapi.c index af6c1138..f5ac227a 100644 --- a/ref/vk/vk_triapi.c +++ b/ref/vk/vk_triapi.c @@ -136,41 +136,6 @@ static int genTriangleStripIndices(void) { return num_indices; } -static void emitDynamicGeometry(int num_indices, const vec4_t color, const char* name ) { - if (!num_indices) - return; - - r_geometry_buffer_lock_t buffer; - if (!R_GeometryBufferAllocAndLock( &buffer, g_triapi.num_vertices, num_indices, LifetimeSingleFrame )) { - gEngine.Con_Printf(S_ERROR "Cannot allocate geometry for tri api\n"); - return; - } - - memcpy(buffer.vertices.ptr, g_triapi.vertices, sizeof(vk_vertex_t) * g_triapi.num_vertices); - memcpy(buffer.indices.ptr, g_triapi.indices, sizeof(uint16_t) * num_indices); - - R_GeometryBufferUnlock( &buffer ); - - { - const vk_render_geometry_t geometry = { - .texture = g_triapi.texture_index, - .material = kXVkMaterialRegular, - - .max_vertex = g_triapi.num_vertices, - .vertex_offset = buffer.vertices.unit_offset, - - .element_count = num_indices, - .index_offset = buffer.indices.unit_offset, - - .emissive = { color[0], color[1], color[2] }, - }; - - VK_RenderModelDynamicBegin( g_triapi.render_type, color, m_matrix4x4_identity, name ); - VK_RenderModelDynamicAddGeometry( &geometry ); - VK_RenderModelDynamicCommit(); - } -} - void TriEnd( void ) { if (!g_triapi.primitive_mode) return; @@ -196,7 +161,19 @@ void TriEndEx( const vec4_t color, const char* name ) { break; } - emitDynamicGeometry(num_indices, color, name); + if (num_indices) { + R_RenderDrawOnce((r_draw_once_t){ + .name = name, + .vertices = g_triapi.vertices, + .indices = g_triapi.indices, + .vertices_count = g_triapi.num_vertices, + .indices_count = num_indices, + .render_type = g_triapi.render_type, + .texture = g_triapi.texture_index, + .emissive = (const vec4_t*)color, + .color = (const vec4_t*)color, + }); + } g_triapi.num_vertices = 0; g_triapi.primitive_mode = 0; @@ -230,6 +207,18 @@ void TriColor4ub_( byte r, byte g, byte b, byte a ) { } void TriColor4f( float r, float g, float b, float a ) { - TriColor4ub_(clampi32(r*255.f, 0, 255),clampi32(g*255.f, 0, 255),clampi32(b*255.f, 0, 255),clampi32(a*255.f, 0, 255)); + TriColor4ub_( + clampi32(r*255.f, 0, 255), + clampi32(g*255.f, 0, 255), + clampi32(b*255.f, 0, 255), + clampi32(a*255.f, 0, 255)); } +void TriNormal3fv( const float *v ) { + TriNormal3f(v[0], v[1], v[2]); +} + +void TriNormal3f( float x, float y, float z ) { + vk_vertex_t *const ve = g_triapi.vertices + g_triapi.num_vertices; + VectorSet(ve->normal, x, y, z); +} diff --git a/ref/vk/vk_triapi.h b/ref/vk/vk_triapi.h index a1a38a34..c3a0f2fd 100644 --- a/ref/vk/vk_triapi.h +++ b/ref/vk/vk_triapi.h @@ -14,6 +14,9 @@ void TriTexCoord2f( float u, float v ); void TriColor4f( float r, float g, float b, float a ); void TriColor4ub_( byte r, byte g, byte b, byte a ); // FIXME consolidate with vk_renderstate +void TriNormal3fv( const float *v ); +void TriNormal3f( float x, float y, float z ); + // Emits next vertex void TriVertex3fv( const float *v ); void TriVertex3f( float x, float y, float z );