add ring temp allocator logic

This commit is contained in:
Ivan 'provod' Avdeev 2021-05-03 11:17:01 -07:00
parent 819540d5ec
commit be42bd29a5
8 changed files with 124 additions and 73 deletions

View File

@ -130,12 +130,6 @@ void VK_BrushModelDraw( const cl_entity_t *ent, int render_mode )
if (bmodel->render_model.num_geometries == 0)
return;
if (!tglob.lightmapTextures[0])
{
gEngine.Con_Printf( S_ERROR "Don't have a lightmap texture\n");
return;
}
for (int i = 0; i < bmodel->render_model.num_geometries; ++i) {
texture_t *t = R_TextureAnimation(ent, bmodel->surf[i]);
if (t->gl_texturenum < 0)

View File

@ -48,3 +48,47 @@ void destroyBuffer(vk_buffer_t *buf) {
buf->size = 0;
}
}
void VK_RingBuffer_Clear(vk_ring_buffer_t* buf) {
buf->offset_free = 0;
buf->permanent_size = 0;
buf->free = buf->size;
}
// < v->
// |MAP|.........|FRAME|...|
// ^ XXXXX
uint32_t VK_RingBuffer_Alloc(vk_ring_buffer_t* buf, uint32_t size, uint32_t align) {
uint32_t offset = ALIGN_UP(buf->offset_free, align);
const uint32_t align_diff = offset - buf->offset_free;
uint32_t available = buf->free - align_diff;
const uint32_t tail = buf->size - offset;
if (available < size)
return AllocFailed;
if (size > tail) {
offset = ALIGN_UP(buf->permanent_size, align);
const uint32_t align_diff = offset - buf->permanent_size;
available -= align_diff - tail;
}
if (available < size)
return AllocFailed;
buf->offset_free = offset + size;
buf->free = available - size;
return offset;
}
void VK_RingBuffer_Fix(vk_ring_buffer_t* buf) {
ASSERT(buf->permanent_size == 0);
buf->permanent_size = buf->offset_free;
}
void VK_RingBuffer_ClearFrame(vk_ring_buffer_t* buf) {
buf->offset_free = buf->permanent_size;
buf->free = buf->size - buf->permanent_size;
}

View File

@ -5,3 +5,29 @@ qboolean createBuffer(vk_buffer_t *buf, uint32_t size, VkBufferUsageFlags usage,
void destroyBuffer(vk_buffer_t *buf);
// v -- begin of ring buffer|permanent_size
// |XXXMAPLIFETME|<......|FRAME1|FRAME2|FRAMEN|......................>|
// busy pos - ^ ^ ^ ^ -- write pos | offset_free
typedef struct {
uint32_t size;
uint32_t permanent_size;
uint32_t offset_free;
uint32_t free;
// TODO per-frame offsets for many frames in flight
} vk_ring_buffer_t;
enum { AllocFailed = 0xffffffffu };
// Marks the entire buffer as free
void VK_RingBuffer_Clear(vk_ring_buffer_t* buf);
// Allocates a new aligned region and returns offset to it (-1 if allocation failed)
uint32_t VK_RingBuffer_Alloc(vk_ring_buffer_t* buf, uint32_t size, uint32_t align);
// Fixes everything that has been allocated since Clear as permanent, ring buffer will operate on the remainder only
// Can be called only once since Clear
void VK_RingBuffer_Fix(vk_ring_buffer_t* buf);
// Clears non-permantent part of the buffer
void VK_RingBuffer_ClearFrame(vk_ring_buffer_t* buf);

View File

@ -40,16 +40,11 @@ static struct {
VkPipeline pipelines[kRenderTransAdd + 1];
vk_buffer_t buffer;
uint32_t buffer_free_offset;
uint32_t buffer_frame_begin_offset;
vk_ring_buffer_t buffer_alloc;
vk_buffer_t uniform_buffer;
uint32_t ubo_align;
struct {
int align_holes_size;
} stat;
vk_buffer_alloc_t allocs[MAX_ALLOCS];
int allocs_free[MAX_ALLOCS];
int num_free_allocs;
@ -308,6 +303,8 @@ qboolean VK_RenderInit( void )
resetAllocFreeList();
g_render.buffer_alloc.size = g_render.buffer.size;
return true;
}
@ -323,8 +320,8 @@ void VK_RenderShutdown( void )
vk_buffer_handle_t VK_RenderBufferAlloc( uint32_t unit_size, uint32_t count, vk_lifetime_t lifetime )
{
const uint32_t offset = ALIGN_UP(g_render.buffer_free_offset, unit_size);
const uint32_t alloc_size = unit_size * count;
uint32_t offset;
vk_buffer_alloc_t *alloc;
vk_buffer_handle_t handle = InvalidHandle;
@ -332,14 +329,16 @@ vk_buffer_handle_t VK_RenderBufferAlloc( uint32_t unit_size, uint32_t count, vk_
ASSERT(lifetime != LifetimeLong);
ASSERT(unit_size > 0);
if (offset + alloc_size > g_render.buffer.size) {
gEngine.Con_Printf(S_ERROR "Cannot allocate %u bytes aligned at %u from buffer; only %u are left",
alloc_size, unit_size, g_render.buffer.size - offset);
if (!g_render.num_free_allocs) {
gEngine.Con_Printf(S_ERROR "Cannot allocate buffer, allocs count exhausted\n" );
return InvalidHandle;
}
if (!g_render.num_free_allocs) {
gEngine.Con_Printf(S_ERROR "Cannot allocate buffer, allocs count exhausted\n" );
offset = VK_RingBuffer_Alloc(&g_render.buffer_alloc, alloc_size, unit_size);
if (offset == AllocFailed) {
gEngine.Con_Printf(S_ERROR "Cannot allocate %u bytes aligned at %u from buffer; only %u are left",
alloc_size, unit_size, g_render.buffer_alloc.free);
return InvalidHandle;
}
@ -353,12 +352,6 @@ vk_buffer_handle_t VK_RenderBufferAlloc( uint32_t unit_size, uint32_t count, vk_
alloc->lifetime = lifetime;
alloc->count = count;
g_render.stat.align_holes_size += offset - g_render.buffer_free_offset;
g_render.buffer_free_offset = offset + alloc_size;
if (lifetime < LifetimeSingleFrame)
g_render.buffer_frame_begin_offset = g_render.buffer_free_offset;
return handle;
}
@ -408,7 +401,7 @@ uint32_t VK_RenderBufferGetOffsetInUnits( vk_buffer_handle_t handle )
// Free all LifetimeSingleFrame resources
void VK_RenderBufferClearFrame( void )
{
g_render.buffer_free_offset = g_render.buffer_frame_begin_offset;
VK_RingBuffer_ClearFrame(&g_render.buffer_alloc);
for (int i = 0; i < MAX_ALLOCS; ++i) {
vk_buffer_alloc_t *alloc = g_render.allocs + i;
@ -428,18 +421,21 @@ void VK_RenderBufferClearFrame( void )
// Free all LifetimeMap resources
void VK_RenderBufferClearMap( void )
{
g_render.buffer_free_offset = g_render.buffer_frame_begin_offset = 0;
g_render.stat.align_holes_size = 0;
VK_RingBuffer_Clear(&g_render.buffer_alloc);
g_render.num_static_lights = 0;
resetAllocFreeList();
}
void VK_RenderMapLoadEnd( void )
{
VK_RingBuffer_Fix(&g_render.buffer_alloc);
}
void VK_RenderBufferPrintStats( void )
{
gEngine.Con_Reportf("Buffer usage: %uKiB of (%uKiB); holes: %u bytes\n",
g_render.buffer_free_offset / 1024,
g_render.buffer.size / 1024,
g_render.stat.align_holes_size);
gEngine.Con_Reportf("Buffer usage: %uKiB of (%uKiB)\n",
g_render.buffer_alloc.permanent_size / 1024,
g_render.buffer.size / 1024);
}
#define MAX_DRAW_COMMANDS 8192 // TODO estimate
@ -821,7 +817,7 @@ void VK_RenderEndRTX( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage
.geometry_data = {
.buffer = g_render.buffer.buffer,
.size = g_render.buffer_free_offset,
.size = VK_WHOLE_SIZE,
},
};

View File

@ -35,6 +35,9 @@ void VK_RenderBufferClearFrame( void );
// Free all LifetimeMap resources
void VK_RenderBufferClearMap( void );
// Mark map as loaded
void VK_RenderMapLoadEnd( void );
// TODO uploading to GPU mem interface
void VK_RenderBufferPrintStats( void );

View File

@ -106,6 +106,7 @@ static struct {
// TODO: unify this with render buffer
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
vk_buffer_t accels_buffer;
vk_ring_buffer_t accels_buffer_alloc;
// Temp: lives only during a single frame (may have many in flight)
// Used for building ASes;
@ -124,6 +125,7 @@ static struct {
// TODO unify with render buffer
// Needs: STORAGE_BUFFER
vk_buffer_t kusochki_buffer;
vk_ring_buffer_t kusochki_alloc;
// TODO this should really be a single uniform buffer for matrices and light data
@ -149,30 +151,15 @@ static struct {
// Can be potentially crated using compute shader (would need shader write bit)
vk_buffer_t light_grid_buffer;
// TODO make this:
// v -- begin of ring buffer
// |XXXMAPLIFETME|<......|FRAME1|FRAME2|FRAMEN|......................>|
// busy pos - ^ ^ ^ ^ -- write pos
// TODO need several TLASes for N frames in flight
VkAccelerationStructureKHR tlas;
// Data that is alive longer than one frame, usually within one map
struct {
uint32_t buffer_offset;
int num_kusochki;
} map;
// Per-frame data that is accumulated between RayFrameBegin and End calls
struct {
int num_models;
int num_lighttextures;
vk_ray_model_t models[MAX_ACCELS];
uint32_t scratch_offset; // for building dynamic blases
// for dynamic models
uint32_t buffer_offset_at_frame_begin;
int num_kusochki_at_frame_begin;
} frame;
unsigned frame_number;
@ -250,25 +237,22 @@ static qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, cons
}
if (should_create) {
const uint32_t buffer_offset = VK_RingBuffer_Alloc(&g_rtx.accels_buffer_alloc, build_size.accelerationStructureSize, 256);
VkAccelerationStructureCreateInfoKHR asci = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
.buffer = g_rtx.accels_buffer.buffer,
.offset = g_rtx.map.buffer_offset,
.offset = buffer_offset,
.type = args->type,
.size = build_size.accelerationStructureSize,
};
if (MAX_ACCELS_BUFFER - g_rtx.map.buffer_offset < build_size.accelerationStructureSize) {
gEngine.Con_Printf(S_ERROR "Accels buffer overflow: left %u bytes, but need %u\n",
MAX_ACCELS_BUFFER - g_rtx.map.buffer_offset,
if (buffer_offset == AllocFailed) {
gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for accel buffer\n",
build_size.accelerationStructureSize);
return false;
}
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, args->accel));
g_rtx.map.buffer_offset += build_size.accelerationStructureSize;
g_rtx.map.buffer_offset = (g_rtx.map.buffer_offset + 255) & ~255; // Buffer must be aligned to 256 according to spec
}
build_info.dstAccelerationStructure = *args->accel;
@ -282,8 +266,8 @@ static qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, cons
void VK_RayNewMap( void ) {
ASSERT(vk_core.rtx);
g_rtx.map.buffer_offset = 0;
g_rtx.map.num_kusochki = 0;
VK_RingBuffer_Clear(&g_rtx.accels_buffer_alloc);
VK_RingBuffer_Clear(&g_rtx.kusochki_alloc);
// Upload light grid
{
@ -304,6 +288,11 @@ void VK_RayNewMap( void ) {
}
}
void VK_RayMapLoadEnd( void ) {
VK_RingBuffer_Fix(&g_rtx.accels_buffer_alloc);
VK_RingBuffer_Fix(&g_rtx.kusochki_alloc);
}
void VK_RayFrameBegin( void )
{
ASSERT(vk_core.rtx);
@ -330,8 +319,10 @@ void VK_RayFrameBegin( void )
g_rtx.frame.scratch_offset = 0;
g_rtx.frame.num_models = 0;
g_rtx.frame.num_lighttextures = 0;
g_rtx.frame.buffer_offset_at_frame_begin = g_rtx.map.buffer_offset;
g_rtx.frame.num_kusochki_at_frame_begin = g_rtx.map.num_kusochki;
// TODO N frames in flight
VK_RingBuffer_ClearFrame(&g_rtx.accels_buffer_alloc);
VK_RingBuffer_ClearFrame(&g_rtx.kusochki_alloc);
}
void VK_RayFrameAddModelDynamic( VkCommandBuffer cmdbuf, const vk_ray_model_dynamic_t *dynamic)
@ -699,10 +690,6 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
args->dst.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region,
VK_FILTER_NEAREST);
}
// Restore dynamic buffer offset
g_rtx.map.buffer_offset = g_rtx.frame.buffer_offset_at_frame_begin;
g_rtx.map.num_kusochki = g_rtx.frame.num_kusochki_at_frame_begin;
}
static void createLayouts( void ) {
@ -819,6 +806,7 @@ qboolean VK_RayInit( void )
return false;
}
g_rtx.accels_buffer_addr = getBufferDeviceAddress(g_rtx.accels_buffer.buffer);
g_rtx.accels_buffer_alloc.size = g_rtx.accels_buffer.size;
if (!createBuffer(&g_rtx.scratch_buffer, MAX_SCRATCH_BUFFER,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
@ -842,6 +830,8 @@ qboolean VK_RayInit( void )
// FIXME complain, handle
return false;
}
g_rtx.kusochki_alloc.size = MAX_KUSOCHKI;
if (!createBuffer(&g_rtx.emissive_kusochki_buffer, sizeof(vk_emissive_kusochki_t),
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT /* | VK_BUFFER_USAGE_TRANSFER_DST_BIT */,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
@ -946,11 +936,11 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
const VkDeviceAddress buffer_addr = getBufferDeviceAddress(args.buffer);
vk_kusok_data_t *kusochki;
qboolean result;
const uint32_t kusochki_count_offset = VK_RingBuffer_Alloc(&g_rtx.kusochki_alloc, args.model->num_geometries, 1);
ASSERT(vk_core.rtx);
ASSERT(g_rtx.map.num_kusochki <= MAX_KUSOCHKI);
if (g_rtx.map.num_kusochki + args.model->num_geometries > MAX_KUSOCHKI) {
if (kusochki_count_offset == AllocFailed) {
gEngine.Con_Printf(S_ERROR "Maximum number of kusochki exceeded on model %s\n", args.model->debug_name);
return false;
}
@ -960,8 +950,8 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
geom_build_ranges = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
geom_build_ranges_ptr = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
kusochki = (vk_kusok_data_t*)(g_rtx.kusochki_buffer.mapped) + g_rtx.map.num_kusochki;
args.model->rtx.kusochki_offset = g_rtx.map.num_kusochki;
kusochki = (vk_kusok_data_t*)(g_rtx.kusochki_buffer.mapped) + kusochki_count_offset;
args.model->rtx.kusochki_offset = kusochki_count_offset;
for (int i = 0; i < args.model->num_geometries; ++i) {
const vk_render_geometry_t *mg = args.model->geometries + i;
@ -1056,8 +1046,6 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
if (result) {
int blas_index;
g_rtx.map.num_kusochki += args.model->num_geometries;
for (blas_index = 0; blas_index < ARRAYSIZE(g_rtx.blases); ++blas_index) {
if (g_rtx.blases[blas_index] == VK_NULL_HANDLE) {
g_rtx.blases[blas_index] = args.model->rtx.blas;

View File

@ -59,6 +59,7 @@ typedef struct {
void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args);
void VK_RayNewMap( void );
void VK_RayMapLoadEnd( void );
qboolean VK_RayInit( void );
void VK_RayShutdown( void );

View File

@ -101,15 +101,12 @@ void R_NewMap( void )
// This is to ensure that we have computed lightstyles properly
VK_RunLightStyles();
// TODO this should be per frame
VK_LightsLoadMap();
// TODO should we do something like VK_BrushBeginLoad?
VK_BrushStatsClear();
// FIXME this is totally incorrect btw.
// When loading a save game from the same map this is called, but brush models
// have not been destroyed, which prevents them from being loaded ("again").
// This leads to ASSERTS firing when trying to draw erased buffers.
VK_RenderBufferClearMap();
if (vk_core.rtx)
@ -136,7 +133,9 @@ void R_NewMap( void )
// TODO should we do something like VK_BrushEndLoad?
VK_UploadLightmap();
VK_RenderMapLoadEnd();
if (vk_core.rtx)
VK_RayMapLoadEnd();
VK_RenderBufferPrintStats();
}