rtx: fix blas creation validation/memory errors

This commit is contained in:
Ivan 'provod' Avdeev 2021-02-27 15:40:57 -08:00
parent 68bce887b4
commit c77f39e353
8 changed files with 90 additions and 65 deletions

View File

@ -16,11 +16,12 @@ qboolean createBuffer(vk_buffer_t *buf, uint32_t size, VkBufferUsageFlags usage,
vkGetBufferMemoryRequirements(vk_core.device, buf->buffer, &memreq);
gEngine.Con_Reportf("memreq: memoryTypeBits=0x%x alignment=%zu size=%zu\n", memreq.memoryTypeBits, memreq.alignment, memreq.size);
buf->device_memory = allocateDeviceMemory(memreq, flags);
buf->device_memory = allocateDeviceMemory(memreq, flags, usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT : 0);
XVK_CHECK(vkBindBufferMemory(vk_core.device, buf->buffer, buf->device_memory.device_memory, buf->device_memory.offset));
// FIXME when there are many allocation per VkDeviceMemory, fix this
XVK_CHECK(vkMapMemory(vk_core.device, buf->device_memory.device_memory, 0, bci.size, 0, &buf->mapped));
if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & flags)
XVK_CHECK(vkMapMemory(vk_core.device, buf->device_memory.device_memory, 0, bci.size, 0, &buf->mapped));
buf->size = size;
@ -37,7 +38,9 @@ void destroyBuffer(vk_buffer_t *buf) {
if (buf->device_memory.device_memory)
{
vkUnmapMemory(vk_core.device, buf->device_memory.device_memory);
if (buf->mapped)
vkUnmapMemory(vk_core.device, buf->device_memory.device_memory);
freeDeviceMemory(&buf->device_memory);
buf->device_memory.device_memory = VK_NULL_HANDLE;
buf->device_memory.offset = 0;

View File

@ -41,6 +41,7 @@ static PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
INSTANCE_FUNCS(X)
INSTANCE_DEBUG_FUNCS(X)
DEVICE_FUNCS(X)
DEVICE_FUNCS_RTX(X)
#undef X
static dllfunc_t nullinst_funcs[] = {
@ -802,11 +803,18 @@ static uint32_t findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFla
return UINT32_MAX;
}
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props) {
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags) {
// TODO coalesce allocations, ...
device_memory_t ret = {0};
const VkMemoryAllocateFlagsInfo mafi = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
.flags = flags,
};
VkMemoryAllocateInfo mai = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = flags ? &mafi : NULL,
.allocationSize = req.size,
.memoryTypeIndex = findMemoryWithType(req.memoryTypeBits, props),
};

View File

@ -24,7 +24,7 @@ typedef struct device_memory_s
uint32_t offset;
} device_memory_t;
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props);
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags);
void freeDeviceMemory(device_memory_t *mem);
typedef struct vk_buffer_s

View File

@ -77,7 +77,7 @@ static void createDepthImage(int w, int h) {
vk_frame.depth.image = createImage(w, h, vk_frame.depth.format, tiling, usage);
vkGetImageMemoryRequirements(vk_core.device, vk_frame.depth.image, &memreq);
vk_frame.depth.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
vk_frame.depth.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
XVK_CHECK(vkBindImageMemory(vk_core.device, vk_frame.depth.image, vk_frame.depth.device_memory.device_memory, 0));
{

View File

@ -249,7 +249,7 @@ qboolean VK_RenderInit( void )
// TODO device memory and friends (e.g. handle mobile memory ...)
if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (vk_core.rtx ? VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT : 0), VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
return false;
if (!createBuffer(&g_render.uniform_buffer, g_render.uniform_unit_size * MAX_UNIFORM_SLOTS, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
@ -411,8 +411,6 @@ typedef struct {
render_draw_t draw;
uint32_t ubo_offset;
//char debug_name[MAX_DEBUG_NAME_LENGTH];
vk_ray_model_handle_t ray_model;
} draw_command_t;
static struct {
@ -526,24 +524,6 @@ void VK_RenderScheduleDraw( const render_draw_t *draw )
draw_command = g_render_state.draw_commands + (g_render_state.num_draw_commands++);
draw_command->draw = *draw;
draw_command->ubo_offset = g_render.uniform_unit_size * ubo_index;
draw_command->ray_model = InvalidRayModel;
if (vk_core.rtx)
// TODO there's a more complex story with lifetimes and rebuilds && vertex_buffer->lifetime < LifetimeSingleFrame)
{
// TODO it would make sense to join logical models into a single ray model
// but here we've completely lost this info, as models are now just a stream
// of independent draws
const vk_ray_model_create_t ray_model_args = {
.element_count = draw->element_count,
.vertex_count = vertex_buffer->count,
.index_offset = index_buffer ? index_buffer->unit_size * (draw->index_offset + index_buffer->buffer_offset_in_units) : UINT32_MAX,
.vertex_offset = (draw->vertex_offset + vertex_buffer->buffer_offset_in_units) * vertex_buffer->unit_size,
.buffer = g_render.buffer.buffer,
};
draw_command->ray_model = VK_RayModelCreate( &ray_model_args );
}
}
void VK_RenderEnd( VkCommandBuffer cmdbuf )
@ -626,9 +606,24 @@ void VK_RenderEndRTX( VkCommandBuffer cmdbuf )
VK_RaySceneBegin();
for (int i = 0; i < g_render_state.num_draw_commands; ++i) {
const draw_command_t *const draw = g_render_state.draw_commands + i;
if (draw->ray_model) {
VK_RayScenePushModel( cmdbuf, draw->ray_model );
}
const vk_buffer_alloc_t *vertex_buffer = getBufferFromHandle( draw->draw.vertex_buffer );
const vk_buffer_alloc_t *index_buffer = draw->draw.index_buffer != InvalidHandle ? getBufferFromHandle( draw->draw.index_buffer ) : NULL;
const uint32_t vertex_offset = vertex_buffer->buffer_offset_in_units + draw->draw.vertex_offset;
// TODO there's a more complex story with lifetimes and rebuilds && vertex_buffer->lifetime < LifetimeSingleFrame)
// TODO it would make sense to join logical models into a single ray model
// but here we've completely lost this info, as models are now just a stream
// of independent draws
const vk_ray_model_create_t ray_model_args = {
.element_count = draw->draw.element_count,
.max_vertex = vertex_buffer->count, // TODO this is an upper bound for brushes at least, it can be lowered
.index_offset = index_buffer ? index_buffer->unit_size * (draw->draw.index_offset + index_buffer->buffer_offset_in_units) : UINT32_MAX,
.vertex_offset = (draw->draw.vertex_offset + vertex_buffer->buffer_offset_in_units) * vertex_buffer->unit_size,
.buffer = g_render.buffer.buffer,
};
VK_RayScenePushModel(cmdbuf, &ray_model_args);
}
VK_RaySceneEnd( cmdbuf );
}

View File

@ -9,15 +9,17 @@
#define MAX_ACCELS 1024
#define MAX_SCRATCH_BUFFER (16*1024*1024)
#define MAX_ACCELS_BUFFER (16*1024*1024)
#define MAX_ACCELS_BUFFER (64*1024*1024)
/*
typedef struct {
//int lightmap, texture;
//int render_mode;
uint32_t element_count, vertex_count;
uint32_t element_count;
uint32_t index_offset, vertex_offset;
VkBuffer buffer;
} vk_ray_model_t;
*/
static struct {
/* VkPipelineLayout pipeline_layout; */
@ -68,11 +70,16 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons
return false;
}
vkGetAccelerationStructureBuildSizesKHR(
vkGetAccelerationStructureBuildSizesKHR(
vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info, max_prim_counts, &build_size);
gEngine.Con_Reportf(
"AS build size: %d, scratch size: %d", build_size.accelerationStructureSize, build_size.buildScratchSize);
{
uint32_t max_prims = 0;
for (int i = 0; i < n_geoms; ++i)
max_prims += max_prim_counts[i];
gEngine.Con_Reportf(
"AS max_prims=%u, n_geoms=%u, build size: %d, scratch size: %d\n", max_prims, n_geoms, build_size.accelerationStructureSize, build_size.buildScratchSize);
}
if (MAX_SCRATCH_BUFFER - g_rtx_scene.scratch_offset < build_size.buildScratchSize) {
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
@ -93,6 +100,7 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons
// TODO alignment?
g_rtx_scene.buffer_offset += build_size.accelerationStructureSize;
g_rtx_scene.buffer_offset = (g_rtx_scene.buffer_offset + 255) & ~255; // Buffer must be aligned to 256 according to spec
g_rtx_scene.num_accels++;
build_info.dstAccelerationStructure = *handle;
@ -117,38 +125,46 @@ void VK_RaySceneBegin( void )
g_rtx_scene.num_accels = 0;
}
static vk_ray_model_t *getModelByHandle(vk_ray_model_handle_t handle);
/*
static vk_ray_model_t *getModelByHandle(vk_ray_model_handle_t handle)
{
}
*/
void VK_RayScenePushModel( VkCommandBuffer cmdbuf, vk_ray_model_handle_t model_handle )
void VK_RayScenePushModel( VkCommandBuffer cmdbuf, const vk_ray_model_create_t *model) // _handle_t model_handle )
{
ASSERT(vk_core.rtx);
vk_ray_model_t *model = getModelByHandle(model_handle);
const VkDeviceAddress buffer_addr = getBufferDeviceAddress(model->buffer);
const VkAccelerationStructureGeometryKHR geom[] = {
{
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR,
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
.geometry.triangles =
(VkAccelerationStructureGeometryTrianglesDataKHR){
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
.indexType = model->index_offset == UINT32_MAX ? VK_INDEX_TYPE_NONE_KHR : VK_INDEX_TYPE_UINT16,
.maxVertex = model->vertex_count,
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
.vertexStride = sizeof(vk_vertex_t),
.vertexData.deviceAddress = buffer_addr + model->vertex_offset,
.indexData.deviceAddress = buffer_addr + model->index_offset,
},
}};
const uint32_t max_prim_counts[ARRAYSIZE(geom)] = {model->vertex_count};
const VkAccelerationStructureBuildRangeInfoKHR build_range_tri = {
.primitiveCount = model->element_count / 3,
};
const VkAccelerationStructureBuildRangeInfoKHR *build_ranges[ARRAYSIZE(geom)] = {&build_range_tri};
{
//vk_ray_model_t *model = getModelByHandle(model_handle);
const VkDeviceAddress buffer_addr = getBufferDeviceAddress(model->buffer);
const uint32_t prim_count = model->element_count / 3;
const VkAccelerationStructureGeometryKHR geom[] = {
{
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR,
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
.geometry.triangles =
(VkAccelerationStructureGeometryTrianglesDataKHR){
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
.indexType = model->index_offset == UINT32_MAX ? VK_INDEX_TYPE_NONE_KHR : VK_INDEX_TYPE_UINT16,
.maxVertex = model->max_vertex,
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
.vertexStride = sizeof(vk_vertex_t),
.vertexData.deviceAddress = buffer_addr + model->vertex_offset,
.indexData.deviceAddress = buffer_addr + model->index_offset,
},
} };
createAndBuildAccelerationStructure(cmdbuf,
geom, max_prim_counts, build_ranges, ARRAYSIZE(geom), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
const uint32_t max_prim_counts[ARRAYSIZE(geom)] = { prim_count };
const VkAccelerationStructureBuildRangeInfoKHR build_range_tri = {
.primitiveCount = prim_count,
};
const VkAccelerationStructureBuildRangeInfoKHR* build_ranges[ARRAYSIZE(geom)] = { &build_range_tri };
createAndBuildAccelerationStructure(cmdbuf,
geom, max_prim_counts, build_ranges, ARRAYSIZE(geom), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
}
}
void VK_RaySceneEnd( VkCommandBuffer cmdbuf )
@ -190,5 +206,7 @@ void VK_RayShutdown( void )
ASSERT(vk_core.rtx);
destroyBuffer(&g_rtx.scratch_buffer);
destroyBuffer(&g_rtx.accels_buffer);
// TODO dealloc all ASes
}

View File

@ -5,7 +5,8 @@
typedef struct {
//int lightmap, texture;
//int render_mode;
uint32_t element_count, vertex_count;
uint32_t max_vertex;
uint32_t element_count;
uint32_t index_offset, vertex_offset;
VkBuffer buffer;
} vk_ray_model_create_t;
@ -16,7 +17,7 @@ enum { InvalidRayModel = -1 };
vk_ray_model_handle_t VK_RayModelCreate( const vk_ray_model_create_t *args );
void VK_RaySceneBegin( void );
void VK_RayScenePushModel( VkCommandBuffer cmdbuf, vk_ray_model_handle_t model );
void VK_RayScenePushModel(VkCommandBuffer cmdbuf, const vk_ray_model_create_t* model); // vk_ray_model_handle_t model );
void VK_RaySceneEnd( VkCommandBuffer cmdbuf );
qboolean VK_RayInit( void );

View File

@ -345,7 +345,7 @@ static qboolean VK_UploadTexture(vk_texture_t *tex, rgbdata_t *pic)
{
VkMemoryRequirements memreq;
vkGetImageMemoryRequirements(vk_core.device, tex->vk.image, &memreq);
tex->vk.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
tex->vk.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
XVK_CHECK(vkBindImageMemory(vk_core.device, tex->vk.image, tex->vk.device_memory.device_memory, tex->vk.device_memory.offset));
}