vk: improve VkDeviceMemory allocation
Allocate device memory in big 128Mb chunks. Sub-allocate buffers, images and other resources within those chunks. This uses a dumb grow-only bump allocator for now. Will not work for more than a couple maps.
This commit is contained in:
parent
8f7f0d5c8e
commit
bba8b87ada
|
@ -35,9 +35,7 @@ qboolean VK_BufferCreate(const char *debug_name, vk_buffer_t *buf, uint32_t size
|
|||
buf->devmem = VK_DevMemAllocate(memreq, flags, usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT : 0);
|
||||
XVK_CHECK(vkBindBufferMemory(vk_core.device, buf->buffer, buf->devmem.device_memory, buf->devmem.offset));
|
||||
|
||||
// FIXME when there are many allocation per VkDeviceMemory, fix this
|
||||
if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & flags)
|
||||
XVK_CHECK(vkMapMemory(vk_core.device, buf->devmem.device_memory, 0, bci.size, 0, &buf->mapped));
|
||||
buf->mapped = buf->devmem.mapped;
|
||||
|
||||
buf->size = size;
|
||||
|
||||
|
@ -52,9 +50,6 @@ void VK_BufferDestroy(vk_buffer_t *buf) {
|
|||
|
||||
// FIXME when there are many allocation per VkDeviceMemory, fix this
|
||||
if (buf->devmem.device_memory) {
|
||||
if (buf->mapped)
|
||||
vkUnmapMemory(vk_core.device, buf->devmem.device_memory);
|
||||
|
||||
VK_DevMemFree(&buf->devmem);
|
||||
buf->devmem.device_memory = VK_NULL_HANDLE;
|
||||
buf->devmem.offset = 0;
|
||||
|
|
|
@ -14,9 +14,12 @@
|
|||
#include "vk_studio.h"
|
||||
#include "vk_rtx.h"
|
||||
#include "vk_descriptor.h"
|
||||
#include "vk_light.h"
|
||||
#include "vk_nv_aftermath.h"
|
||||
#include "vk_devmem.h"
|
||||
|
||||
// FIXME move this rt-specific stuff out
|
||||
#include "vk_denoiser.h"
|
||||
#include "vk_light.h"
|
||||
|
||||
#include "xash3d_types.h"
|
||||
#include "cvardef.h"
|
||||
|
@ -735,15 +738,19 @@ qboolean R_VkInit( void )
|
|||
if (!createCommandPool())
|
||||
return false;
|
||||
|
||||
if (!VK_DevMemInit())
|
||||
return false;
|
||||
|
||||
if (!VK_BuffersInit())
|
||||
return false;
|
||||
|
||||
// TODO move this to vk_texture module
|
||||
{
|
||||
VkSamplerCreateInfo sci = {
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.magFilter = VK_FILTER_LINEAR,
|
||||
.minFilter = VK_FILTER_LINEAR,
|
||||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,//CLAMP_TO_EDGE,
|
||||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,// TODO CLAMP_TO_EDGE, for menus
|
||||
.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,//CLAMP_TO_EDGE,
|
||||
.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
.anisotropyEnable = vk_core.physical_device.anisotropy_enabled,
|
||||
|
@ -789,6 +796,7 @@ qboolean R_VkInit( void )
|
|||
if (!VK_RayInit())
|
||||
return false;
|
||||
|
||||
// FIXME move all this to rt-specific modules
|
||||
VK_LightsInit();
|
||||
|
||||
if (!XVK_DenoiserInit())
|
||||
|
@ -824,6 +832,8 @@ void R_VkShutdown( void )
|
|||
vkDestroySampler(vk_core.device, vk_core.default_sampler, NULL);
|
||||
VK_BuffersDestroy();
|
||||
|
||||
VK_DevMemDestroy();
|
||||
|
||||
vkDestroyCommandPool(vk_core.device, vk_core.command_pool, NULL);
|
||||
|
||||
vkDestroyDevice(vk_core.device, NULL);
|
||||
|
|
|
@ -1,7 +1,28 @@
|
|||
#include "vk_devmem.h"
|
||||
|
||||
static uint32_t findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFlags flags) {
|
||||
for (uint32_t i = 0; i < vk_core.physical_device.memory_properties2.memoryProperties.memoryTypeCount; ++i) {
|
||||
#define MAX_DEVMEM_ALLOCS 8
|
||||
|
||||
typedef struct {
|
||||
uint32_t type_bit;
|
||||
VkMemoryPropertyFlags property_flags; // device vs host
|
||||
VkMemoryAllocateFlags allocate_flags;
|
||||
VkDeviceMemory device_memory;
|
||||
VkDeviceSize size;
|
||||
|
||||
void *map;
|
||||
int refcount;
|
||||
|
||||
// TODO a better allocator
|
||||
VkDeviceSize free_offset;
|
||||
} vk_device_memory_t;
|
||||
|
||||
static struct {
|
||||
vk_device_memory_t allocs[MAX_DEVMEM_ALLOCS];
|
||||
int num_allocs;
|
||||
} g_vk_devmem;
|
||||
|
||||
static int findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFlags flags) {
|
||||
for (int i = 0; i < (int)vk_core.physical_device.memory_properties2.memoryProperties.memoryTypeCount; ++i) {
|
||||
if (!(type_index_bits & (1 << i)))
|
||||
continue;
|
||||
|
||||
|
@ -12,37 +33,146 @@ static uint32_t findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFla
|
|||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
vk_devmem_t VK_DevMemAllocate(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags) {
|
||||
// TODO coalesce allocations, ...
|
||||
#define DEFAULT_ALLOCATION_SIZE (128 * 1024 * 1024)
|
||||
|
||||
static VkDeviceSize optimalSize(VkDeviceSize size) {
|
||||
if (size < DEFAULT_ALLOCATION_SIZE)
|
||||
return DEFAULT_ALLOCATION_SIZE;
|
||||
|
||||
// TODO:
|
||||
// 1. have a way to iterate for smaller sizes if allocation failed
|
||||
// 2. bump to nearest power-of-two-ish based size (e.g. a multiple of 32Mb or something)
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static int allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags prop_flags, VkMemoryAllocateFlags allocate_flags) {
|
||||
//static int allocateDeviceMemory(VkDeviceSize size, uint32_t type_bits, VkMemoryAllocateFlags flags, VkMemoryPropertyFlags prop_flags) {
|
||||
if (g_vk_devmem.num_allocs == MAX_DEVMEM_ALLOCS)
|
||||
return -1;
|
||||
|
||||
{
|
||||
const VkMemoryAllocateFlagsInfo mafi = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
|
||||
.flags = allocate_flags,
|
||||
};
|
||||
|
||||
const VkMemoryAllocateInfo mai = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = allocate_flags ? &mafi : NULL,
|
||||
.allocationSize = optimalSize(req.size),
|
||||
.memoryTypeIndex = findMemoryWithType(req.memoryTypeBits, prop_flags),
|
||||
};
|
||||
|
||||
gEngine.Con_Reportf("allocateDeviceMemory size=%zu memoryTypeBits=0x%x memoryProperties=%c%c%c%c%c allocate_flags=0x%x prop_flags=0x%x => typeIndex=%d\n",
|
||||
mai.allocationSize, req.memoryTypeBits,
|
||||
prop_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ? 'D' : '.',
|
||||
prop_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ? 'V' : '.',
|
||||
prop_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 'C' : '.',
|
||||
prop_flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT ? '$' : '.',
|
||||
prop_flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT ? 'L' : '.',
|
||||
allocate_flags,
|
||||
prop_flags,
|
||||
mai.memoryTypeIndex);
|
||||
ASSERT(mai.memoryTypeIndex != UINT32_MAX);
|
||||
|
||||
vk_device_memory_t *device_memory = g_vk_devmem.allocs + g_vk_devmem.num_allocs;
|
||||
XVK_CHECK(vkAllocateMemory(vk_core.device, &mai, NULL, &device_memory->device_memory));
|
||||
device_memory->property_flags = vk_core.physical_device.memory_properties2.memoryProperties.memoryTypes[mai.memoryTypeIndex].propertyFlags;
|
||||
device_memory->allocate_flags = allocate_flags;
|
||||
device_memory->type_bit = (1 << mai.memoryTypeIndex);
|
||||
device_memory->free_offset = 0;
|
||||
device_memory->refcount = 0;
|
||||
device_memory->size = mai.allocationSize;
|
||||
|
||||
if (device_memory->property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
|
||||
XVK_CHECK(vkMapMemory(vk_core.device, device_memory->device_memory, 0, device_memory->size, 0, &device_memory->map));
|
||||
} else {
|
||||
device_memory->map = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return g_vk_devmem.num_allocs++;
|
||||
}
|
||||
|
||||
vk_devmem_t VK_DevMemAllocate(VkMemoryRequirements req, VkMemoryPropertyFlags prop_flags, VkMemoryAllocateFlags allocate_flags) {
|
||||
vk_devmem_t ret = {0};
|
||||
int device_memory_index = -1;
|
||||
|
||||
const VkMemoryAllocateFlagsInfo mafi = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
|
||||
.flags = flags,
|
||||
};
|
||||
if (vk_core.rtx) {
|
||||
// TODO this is needed only for the ray tracer and only while there's no proper staging
|
||||
// Once staging is established, we can avoid forcing this on every devmem allocation
|
||||
allocate_flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
|
||||
}
|
||||
|
||||
const VkMemoryAllocateInfo mai = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = flags ? &mafi : NULL,
|
||||
.allocationSize = req.size,
|
||||
.memoryTypeIndex = findMemoryWithType(req.memoryTypeBits, props),
|
||||
};
|
||||
for (int i = 0; i < g_vk_devmem.num_allocs; ++i) {
|
||||
vk_device_memory_t *const device_memory = g_vk_devmem.allocs + i;
|
||||
if ((device_memory->type_bit & req.memoryTypeBits) == 0)
|
||||
continue;
|
||||
|
||||
gEngine.Con_Reportf("allocateDeviceMemory size=%zu memoryTypeBits=0x%x memoryProperties=%c%c%c%c%c flags=0x%x => typeIndex=%d\n", req.size, req.memoryTypeBits,
|
||||
props & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ? 'D' : '.',
|
||||
props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ? 'V' : '.',
|
||||
props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 'C' : '.',
|
||||
props & VK_MEMORY_PROPERTY_HOST_CACHED_BIT ? '$' : '.',
|
||||
props & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT ? 'L' : '.',
|
||||
flags,
|
||||
mai.memoryTypeIndex);
|
||||
if ((device_memory->allocate_flags & allocate_flags) != allocate_flags)
|
||||
continue;
|
||||
|
||||
ASSERT(mai.memoryTypeIndex != UINT32_MAX);
|
||||
XVK_CHECK(vkAllocateMemory(vk_core.device, &mai, NULL, &ret.device_memory));
|
||||
return ret;
|
||||
if ((device_memory->property_flags & prop_flags) != prop_flags)
|
||||
continue;
|
||||
|
||||
const VkDeviceSize aligned_offset = ALIGN_UP(device_memory->free_offset, req.alignment);
|
||||
if (aligned_offset + req.size > device_memory->size)
|
||||
continue;
|
||||
|
||||
device_memory->free_offset = aligned_offset;
|
||||
device_memory_index = i;
|
||||
break;
|
||||
}
|
||||
|
||||
if (device_memory_index < 0) {
|
||||
device_memory_index = allocateDeviceMemory(req, prop_flags, allocate_flags);
|
||||
}
|
||||
|
||||
ASSERT(device_memory_index >= 0);
|
||||
if (device_memory_index < 0)
|
||||
return ret;
|
||||
|
||||
{
|
||||
vk_device_memory_t *const device_memory = g_vk_devmem.allocs + device_memory_index;
|
||||
ret.device_memory = device_memory->device_memory;
|
||||
ret.mapped = device_memory->map ? device_memory->map + device_memory->free_offset : NULL;
|
||||
ret.offset = device_memory->free_offset;
|
||||
|
||||
device_memory->free_offset += req.size;
|
||||
device_memory->refcount++;
|
||||
ret.priv_.index = device_memory_index;
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
void VK_DevMemFree(vk_devmem_t *mem) {
|
||||
vkFreeMemory(vk_core.device, mem->device_memory, NULL);
|
||||
mem->device_memory = VK_NULL_HANDLE;
|
||||
void VK_DevMemFree(const vk_devmem_t *mem) {
|
||||
ASSERT(mem->priv_.index >= 0);
|
||||
ASSERT(mem->priv_.index < g_vk_devmem.num_allocs);
|
||||
|
||||
vk_device_memory_t *const device_memory = g_vk_devmem.allocs + mem->priv_.index;
|
||||
ASSERT(mem->device_memory == device_memory->device_memory);
|
||||
|
||||
// FIXME deallocate properly
|
||||
|
||||
device_memory->refcount--;
|
||||
}
|
||||
|
||||
qboolean VK_DevMemInit( void ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void VK_DevMemDestroy( void ) {
|
||||
for (int i = 0; i < g_vk_devmem.num_allocs; ++i) {
|
||||
const vk_device_memory_t *const device_memory = g_vk_devmem.allocs + i;
|
||||
ASSERT(device_memory->refcount == 0);
|
||||
|
||||
if (device_memory->map)
|
||||
vkUnmapMemory(vk_core.device, device_memory->device_memory);
|
||||
|
||||
vkFreeMemory(vk_core.device, device_memory->device_memory, NULL);
|
||||
}
|
||||
|
||||
g_vk_devmem.num_allocs = 0;
|
||||
}
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
#pragma once
|
||||
#include "vk_core.h"
|
||||
|
||||
// FIXME arena allocation, ...
|
||||
qboolean VK_DevMemInit( void );
|
||||
void VK_DevMemDestroy( void );
|
||||
|
||||
typedef struct vk_devmem_s {
|
||||
VkDeviceMemory device_memory;
|
||||
uint32_t offset;
|
||||
void *mapped;
|
||||
|
||||
struct { int index; } priv_;
|
||||
} vk_devmem_t;
|
||||
|
||||
vk_devmem_t VK_DevMemAllocate(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags);
|
||||
void VK_DevMemFree(vk_devmem_t *mem);
|
||||
void VK_DevMemFree(const vk_devmem_t *mem);
|
||||
|
|
|
@ -113,7 +113,7 @@ static void createDepthImage(int w, int h) {
|
|||
|
||||
vkGetImageMemoryRequirements(vk_core.device, g_frame.depth.image, &memreq);
|
||||
g_frame.depth.device_memory = VK_DevMemAllocate(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
|
||||
XVK_CHECK(vkBindImageMemory(vk_core.device, g_frame.depth.image, g_frame.depth.device_memory.device_memory, 0));
|
||||
XVK_CHECK(vkBindImageMemory(vk_core.device, g_frame.depth.image, g_frame.depth.device_memory.device_memory, g_frame.depth.device_memory.offset));
|
||||
|
||||
{
|
||||
VkImageViewCreateInfo ivci = {.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
|
||||
|
@ -744,11 +744,9 @@ static rgbdata_t *XVK_ReadPixels( void ) {
|
|||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
};
|
||||
VkSubresourceLayout layout;
|
||||
const char *mapped;
|
||||
const char *mapped = dest_devmem.mapped;
|
||||
vkGetImageSubresourceLayout(vk_core.device, dest_image, &subres, &layout);
|
||||
|
||||
vkMapMemory(vk_core.device, dest_devmem.device_memory, 0, VK_WHOLE_SIZE, 0, (void**)&mapped);
|
||||
|
||||
mapped += layout.offset;
|
||||
|
||||
{
|
||||
|
@ -788,8 +786,6 @@ static rgbdata_t *XVK_ReadPixels( void ) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
vkUnmapMemory(vk_core.device, dest_devmem.device_memory);
|
||||
}
|
||||
|
||||
vkDestroyImage(vk_core.device, dest_image, NULL);
|
||||
|
|
|
@ -29,7 +29,7 @@ xvk_image_t XVK_ImageCreate(const xvk_image_create_t *create) {
|
|||
|
||||
vkGetImageMemoryRequirements(vk_core.device, image.image, &memreq);
|
||||
image.devmem = VK_DevMemAllocate(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
|
||||
XVK_CHECK(vkBindImageMemory(vk_core.device, image.image, image.devmem.device_memory, 0));
|
||||
XVK_CHECK(vkBindImageMemory(vk_core.device, image.image, image.devmem.device_memory, image.devmem.offset));
|
||||
|
||||
ivci.viewType = create->is_cubemap ? VK_IMAGE_VIEW_TYPE_CUBE : VK_IMAGE_VIEW_TYPE_2D;
|
||||
ivci.format = ici.format;
|
||||
|
|
Loading…
Reference in New Issue