vk: improve VkDeviceMemory allocation

Allocate device memory in big 128Mb chunks. Sub-allocate buffers, images
and other resources within those chunks.

This uses a dumb grow-only bump allocator for now. Will not work for
more than a couple maps.
This commit is contained in:
Ivan Avdeev 2022-01-13 00:25:06 -08:00
parent 8f7f0d5c8e
commit bba8b87ada
6 changed files with 181 additions and 45 deletions

View File

@ -35,9 +35,7 @@ qboolean VK_BufferCreate(const char *debug_name, vk_buffer_t *buf, uint32_t size
buf->devmem = VK_DevMemAllocate(memreq, flags, usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT : 0);
XVK_CHECK(vkBindBufferMemory(vk_core.device, buf->buffer, buf->devmem.device_memory, buf->devmem.offset));
// FIXME when there are many allocation per VkDeviceMemory, fix this
if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & flags)
XVK_CHECK(vkMapMemory(vk_core.device, buf->devmem.device_memory, 0, bci.size, 0, &buf->mapped));
buf->mapped = buf->devmem.mapped;
buf->size = size;
@ -52,9 +50,6 @@ void VK_BufferDestroy(vk_buffer_t *buf) {
// FIXME when there are many allocation per VkDeviceMemory, fix this
if (buf->devmem.device_memory) {
if (buf->mapped)
vkUnmapMemory(vk_core.device, buf->devmem.device_memory);
VK_DevMemFree(&buf->devmem);
buf->devmem.device_memory = VK_NULL_HANDLE;
buf->devmem.offset = 0;

View File

@ -14,9 +14,12 @@
#include "vk_studio.h"
#include "vk_rtx.h"
#include "vk_descriptor.h"
#include "vk_light.h"
#include "vk_nv_aftermath.h"
#include "vk_devmem.h"
// FIXME move this rt-specific stuff out
#include "vk_denoiser.h"
#include "vk_light.h"
#include "xash3d_types.h"
#include "cvardef.h"
@ -735,15 +738,19 @@ qboolean R_VkInit( void )
if (!createCommandPool())
return false;
if (!VK_DevMemInit())
return false;
if (!VK_BuffersInit())
return false;
// TODO move this to vk_texture module
{
VkSamplerCreateInfo sci = {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = VK_FILTER_LINEAR,
.minFilter = VK_FILTER_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,//CLAMP_TO_EDGE,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,// TODO CLAMP_TO_EDGE, for menus
.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,//CLAMP_TO_EDGE,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.anisotropyEnable = vk_core.physical_device.anisotropy_enabled,
@ -789,6 +796,7 @@ qboolean R_VkInit( void )
if (!VK_RayInit())
return false;
// FIXME move all this to rt-specific modules
VK_LightsInit();
if (!XVK_DenoiserInit())
@ -824,6 +832,8 @@ void R_VkShutdown( void )
vkDestroySampler(vk_core.device, vk_core.default_sampler, NULL);
VK_BuffersDestroy();
VK_DevMemDestroy();
vkDestroyCommandPool(vk_core.device, vk_core.command_pool, NULL);
vkDestroyDevice(vk_core.device, NULL);

View File

@ -1,7 +1,28 @@
#include "vk_devmem.h"
static uint32_t findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFlags flags) {
for (uint32_t i = 0; i < vk_core.physical_device.memory_properties2.memoryProperties.memoryTypeCount; ++i) {
#define MAX_DEVMEM_ALLOCS 8
typedef struct {
uint32_t type_bit;
VkMemoryPropertyFlags property_flags; // device vs host
VkMemoryAllocateFlags allocate_flags;
VkDeviceMemory device_memory;
VkDeviceSize size;
void *map;
int refcount;
// TODO a better allocator
VkDeviceSize free_offset;
} vk_device_memory_t;
static struct {
vk_device_memory_t allocs[MAX_DEVMEM_ALLOCS];
int num_allocs;
} g_vk_devmem;
static int findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFlags flags) {
for (int i = 0; i < (int)vk_core.physical_device.memory_properties2.memoryProperties.memoryTypeCount; ++i) {
if (!(type_index_bits & (1 << i)))
continue;
@ -12,37 +33,146 @@ static uint32_t findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFla
return UINT32_MAX;
}
vk_devmem_t VK_DevMemAllocate(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags) {
// TODO coalesce allocations, ...
#define DEFAULT_ALLOCATION_SIZE (128 * 1024 * 1024)
static VkDeviceSize optimalSize(VkDeviceSize size) {
if (size < DEFAULT_ALLOCATION_SIZE)
return DEFAULT_ALLOCATION_SIZE;
// TODO:
// 1. have a way to iterate for smaller sizes if allocation failed
// 2. bump to nearest power-of-two-ish based size (e.g. a multiple of 32Mb or something)
return size;
}
static int allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags prop_flags, VkMemoryAllocateFlags allocate_flags) {
//static int allocateDeviceMemory(VkDeviceSize size, uint32_t type_bits, VkMemoryAllocateFlags flags, VkMemoryPropertyFlags prop_flags) {
if (g_vk_devmem.num_allocs == MAX_DEVMEM_ALLOCS)
return -1;
{
const VkMemoryAllocateFlagsInfo mafi = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
.flags = allocate_flags,
};
const VkMemoryAllocateInfo mai = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = allocate_flags ? &mafi : NULL,
.allocationSize = optimalSize(req.size),
.memoryTypeIndex = findMemoryWithType(req.memoryTypeBits, prop_flags),
};
gEngine.Con_Reportf("allocateDeviceMemory size=%zu memoryTypeBits=0x%x memoryProperties=%c%c%c%c%c allocate_flags=0x%x prop_flags=0x%x => typeIndex=%d\n",
mai.allocationSize, req.memoryTypeBits,
prop_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ? 'D' : '.',
prop_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ? 'V' : '.',
prop_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 'C' : '.',
prop_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT ? '$' : '.',
prop_flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT ? 'L' : '.',
allocate_flags,
prop_flags,
mai.memoryTypeIndex);
ASSERT(mai.memoryTypeIndex != UINT32_MAX);
vk_device_memory_t *device_memory = g_vk_devmem.allocs + g_vk_devmem.num_allocs;
XVK_CHECK(vkAllocateMemory(vk_core.device, &mai, NULL, &device_memory->device_memory));
device_memory->property_flags = vk_core.physical_device.memory_properties2.memoryProperties.memoryTypes[mai.memoryTypeIndex].propertyFlags;
device_memory->allocate_flags = allocate_flags;
device_memory->type_bit = (1 << mai.memoryTypeIndex);
device_memory->free_offset = 0;
device_memory->refcount = 0;
device_memory->size = mai.allocationSize;
if (device_memory->property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
XVK_CHECK(vkMapMemory(vk_core.device, device_memory->device_memory, 0, device_memory->size, 0, &device_memory->map));
} else {
device_memory->map = NULL;
}
}
return g_vk_devmem.num_allocs++;
}
vk_devmem_t VK_DevMemAllocate(VkMemoryRequirements req, VkMemoryPropertyFlags prop_flags, VkMemoryAllocateFlags allocate_flags) {
vk_devmem_t ret = {0};
int device_memory_index = -1;
const VkMemoryAllocateFlagsInfo mafi = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
.flags = flags,
};
if (vk_core.rtx) {
// TODO this is needed only for the ray tracer and only while there's no proper staging
// Once staging is established, we can avoid forcing this on every devmem allocation
allocate_flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
}
const VkMemoryAllocateInfo mai = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = flags ? &mafi : NULL,
.allocationSize = req.size,
.memoryTypeIndex = findMemoryWithType(req.memoryTypeBits, props),
};
for (int i = 0; i < g_vk_devmem.num_allocs; ++i) {
vk_device_memory_t *const device_memory = g_vk_devmem.allocs + i;
if ((device_memory->type_bit & req.memoryTypeBits) == 0)
continue;
gEngine.Con_Reportf("allocateDeviceMemory size=%zu memoryTypeBits=0x%x memoryProperties=%c%c%c%c%c flags=0x%x => typeIndex=%d\n", req.size, req.memoryTypeBits,
props & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ? 'D' : '.',
props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ? 'V' : '.',
props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 'C' : '.',
props & VK_MEMORY_PROPERTY_HOST_CACHED_BIT ? '$' : '.',
props & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT ? 'L' : '.',
flags,
mai.memoryTypeIndex);
if ((device_memory->allocate_flags & allocate_flags) != allocate_flags)
continue;
ASSERT(mai.memoryTypeIndex != UINT32_MAX);
XVK_CHECK(vkAllocateMemory(vk_core.device, &mai, NULL, &ret.device_memory));
return ret;
if ((device_memory->property_flags & prop_flags) != prop_flags)
continue;
const VkDeviceSize aligned_offset = ALIGN_UP(device_memory->free_offset, req.alignment);
if (aligned_offset + req.size > device_memory->size)
continue;
device_memory->free_offset = aligned_offset;
device_memory_index = i;
break;
}
if (device_memory_index < 0) {
device_memory_index = allocateDeviceMemory(req, prop_flags, allocate_flags);
}
ASSERT(device_memory_index >= 0);
if (device_memory_index < 0)
return ret;
{
vk_device_memory_t *const device_memory = g_vk_devmem.allocs + device_memory_index;
ret.device_memory = device_memory->device_memory;
ret.mapped = device_memory->map ? device_memory->map + device_memory->free_offset : NULL;
ret.offset = device_memory->free_offset;
device_memory->free_offset += req.size;
device_memory->refcount++;
ret.priv_.index = device_memory_index;
return ret;
}
}
void VK_DevMemFree(vk_devmem_t *mem) {
vkFreeMemory(vk_core.device, mem->device_memory, NULL);
mem->device_memory = VK_NULL_HANDLE;
void VK_DevMemFree(const vk_devmem_t *mem) {
ASSERT(mem->priv_.index >= 0);
ASSERT(mem->priv_.index < g_vk_devmem.num_allocs);
vk_device_memory_t *const device_memory = g_vk_devmem.allocs + mem->priv_.index;
ASSERT(mem->device_memory == device_memory->device_memory);
// FIXME deallocate properly
device_memory->refcount--;
}
qboolean VK_DevMemInit( void ) {
return true;
}
void VK_DevMemDestroy( void ) {
for (int i = 0; i < g_vk_devmem.num_allocs; ++i) {
const vk_device_memory_t *const device_memory = g_vk_devmem.allocs + i;
ASSERT(device_memory->refcount == 0);
if (device_memory->map)
vkUnmapMemory(vk_core.device, device_memory->device_memory);
vkFreeMemory(vk_core.device, device_memory->device_memory, NULL);
}
g_vk_devmem.num_allocs = 0;
}

View File

@ -1,11 +1,16 @@
#pragma once
#include "vk_core.h"
// FIXME arena allocation, ...
qboolean VK_DevMemInit( void );
void VK_DevMemDestroy( void );
typedef struct vk_devmem_s {
VkDeviceMemory device_memory;
uint32_t offset;
void *mapped;
struct { int index; } priv_;
} vk_devmem_t;
vk_devmem_t VK_DevMemAllocate(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags);
void VK_DevMemFree(vk_devmem_t *mem);
void VK_DevMemFree(const vk_devmem_t *mem);

View File

@ -113,7 +113,7 @@ static void createDepthImage(int w, int h) {
vkGetImageMemoryRequirements(vk_core.device, g_frame.depth.image, &memreq);
g_frame.depth.device_memory = VK_DevMemAllocate(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
XVK_CHECK(vkBindImageMemory(vk_core.device, g_frame.depth.image, g_frame.depth.device_memory.device_memory, 0));
XVK_CHECK(vkBindImageMemory(vk_core.device, g_frame.depth.image, g_frame.depth.device_memory.device_memory, g_frame.depth.device_memory.offset));
{
VkImageViewCreateInfo ivci = {.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
@ -744,11 +744,9 @@ static rgbdata_t *XVK_ReadPixels( void ) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
};
VkSubresourceLayout layout;
const char *mapped;
const char *mapped = dest_devmem.mapped;
vkGetImageSubresourceLayout(vk_core.device, dest_image, &subres, &layout);
vkMapMemory(vk_core.device, dest_devmem.device_memory, 0, VK_WHOLE_SIZE, 0, (void**)&mapped);
mapped += layout.offset;
{
@ -788,8 +786,6 @@ static rgbdata_t *XVK_ReadPixels( void ) {
}
}
}
vkUnmapMemory(vk_core.device, dest_devmem.device_memory);
}
vkDestroyImage(vk_core.device, dest_image, NULL);

View File

@ -29,7 +29,7 @@ xvk_image_t XVK_ImageCreate(const xvk_image_create_t *create) {
vkGetImageMemoryRequirements(vk_core.device, image.image, &memreq);
image.devmem = VK_DevMemAllocate(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
XVK_CHECK(vkBindImageMemory(vk_core.device, image.image, image.devmem.device_memory, 0));
XVK_CHECK(vkBindImageMemory(vk_core.device, image.image, image.devmem.device_memory, image.devmem.offset));
ivci.viewType = create->is_cubemap ? VK_IMAGE_VIEW_TYPE_CUBE : VK_IMAGE_VIEW_TYPE_2D;
ivci.format = ici.format;