rtx: fix blas creation validation/memory errors
This commit is contained in:
parent
68bce887b4
commit
c77f39e353
|
@ -16,11 +16,12 @@ qboolean createBuffer(vk_buffer_t *buf, uint32_t size, VkBufferUsageFlags usage,
|
|||
vkGetBufferMemoryRequirements(vk_core.device, buf->buffer, &memreq);
|
||||
gEngine.Con_Reportf("memreq: memoryTypeBits=0x%x alignment=%zu size=%zu\n", memreq.memoryTypeBits, memreq.alignment, memreq.size);
|
||||
|
||||
buf->device_memory = allocateDeviceMemory(memreq, flags);
|
||||
buf->device_memory = allocateDeviceMemory(memreq, flags, usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT : 0);
|
||||
XVK_CHECK(vkBindBufferMemory(vk_core.device, buf->buffer, buf->device_memory.device_memory, buf->device_memory.offset));
|
||||
|
||||
// FIXME when there are many allocation per VkDeviceMemory, fix this
|
||||
XVK_CHECK(vkMapMemory(vk_core.device, buf->device_memory.device_memory, 0, bci.size, 0, &buf->mapped));
|
||||
if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & flags)
|
||||
XVK_CHECK(vkMapMemory(vk_core.device, buf->device_memory.device_memory, 0, bci.size, 0, &buf->mapped));
|
||||
|
||||
buf->size = size;
|
||||
|
||||
|
@ -37,7 +38,9 @@ void destroyBuffer(vk_buffer_t *buf) {
|
|||
|
||||
if (buf->device_memory.device_memory)
|
||||
{
|
||||
vkUnmapMemory(vk_core.device, buf->device_memory.device_memory);
|
||||
if (buf->mapped)
|
||||
vkUnmapMemory(vk_core.device, buf->device_memory.device_memory);
|
||||
|
||||
freeDeviceMemory(&buf->device_memory);
|
||||
buf->device_memory.device_memory = VK_NULL_HANDLE;
|
||||
buf->device_memory.offset = 0;
|
||||
|
|
|
@ -41,6 +41,7 @@ static PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
|
|||
INSTANCE_FUNCS(X)
|
||||
INSTANCE_DEBUG_FUNCS(X)
|
||||
DEVICE_FUNCS(X)
|
||||
DEVICE_FUNCS_RTX(X)
|
||||
#undef X
|
||||
|
||||
static dllfunc_t nullinst_funcs[] = {
|
||||
|
@ -802,11 +803,18 @@ static uint32_t findMemoryWithType(uint32_t type_index_bits, VkMemoryPropertyFla
|
|||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props) {
|
||||
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags) {
|
||||
// TODO coalesce allocations, ...
|
||||
device_memory_t ret = {0};
|
||||
|
||||
const VkMemoryAllocateFlagsInfo mafi = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
VkMemoryAllocateInfo mai = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = flags ? &mafi : NULL,
|
||||
.allocationSize = req.size,
|
||||
.memoryTypeIndex = findMemoryWithType(req.memoryTypeBits, props),
|
||||
};
|
||||
|
|
|
@ -24,7 +24,7 @@ typedef struct device_memory_s
|
|||
uint32_t offset;
|
||||
} device_memory_t;
|
||||
|
||||
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props);
|
||||
device_memory_t allocateDeviceMemory(VkMemoryRequirements req, VkMemoryPropertyFlags props, VkMemoryAllocateFlags flags);
|
||||
void freeDeviceMemory(device_memory_t *mem);
|
||||
|
||||
typedef struct vk_buffer_s
|
||||
|
|
|
@ -77,7 +77,7 @@ static void createDepthImage(int w, int h) {
|
|||
vk_frame.depth.image = createImage(w, h, vk_frame.depth.format, tiling, usage);
|
||||
|
||||
vkGetImageMemoryRequirements(vk_core.device, vk_frame.depth.image, &memreq);
|
||||
vk_frame.depth.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
vk_frame.depth.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
|
||||
XVK_CHECK(vkBindImageMemory(vk_core.device, vk_frame.depth.image, vk_frame.depth.device_memory.device_memory, 0));
|
||||
|
||||
{
|
||||
|
|
|
@ -249,7 +249,7 @@ qboolean VK_RenderInit( void )
|
|||
|
||||
// TODO device memory and friends (e.g. handle mobile memory ...)
|
||||
|
||||
if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||
if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (vk_core.rtx ? VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT : 0), VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||
return false;
|
||||
|
||||
if (!createBuffer(&g_render.uniform_buffer, g_render.uniform_unit_size * MAX_UNIFORM_SLOTS, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||
|
@ -411,8 +411,6 @@ typedef struct {
|
|||
render_draw_t draw;
|
||||
uint32_t ubo_offset;
|
||||
//char debug_name[MAX_DEBUG_NAME_LENGTH];
|
||||
|
||||
vk_ray_model_handle_t ray_model;
|
||||
} draw_command_t;
|
||||
|
||||
static struct {
|
||||
|
@ -526,24 +524,6 @@ void VK_RenderScheduleDraw( const render_draw_t *draw )
|
|||
draw_command = g_render_state.draw_commands + (g_render_state.num_draw_commands++);
|
||||
draw_command->draw = *draw;
|
||||
draw_command->ubo_offset = g_render.uniform_unit_size * ubo_index;
|
||||
draw_command->ray_model = InvalidRayModel;
|
||||
|
||||
if (vk_core.rtx)
|
||||
// TODO there's a more complex story with lifetimes and rebuilds && vertex_buffer->lifetime < LifetimeSingleFrame)
|
||||
{
|
||||
// TODO it would make sense to join logical models into a single ray model
|
||||
// but here we've completely lost this info, as models are now just a stream
|
||||
// of independent draws
|
||||
const vk_ray_model_create_t ray_model_args = {
|
||||
.element_count = draw->element_count,
|
||||
.vertex_count = vertex_buffer->count,
|
||||
.index_offset = index_buffer ? index_buffer->unit_size * (draw->index_offset + index_buffer->buffer_offset_in_units) : UINT32_MAX,
|
||||
.vertex_offset = (draw->vertex_offset + vertex_buffer->buffer_offset_in_units) * vertex_buffer->unit_size,
|
||||
.buffer = g_render.buffer.buffer,
|
||||
};
|
||||
|
||||
draw_command->ray_model = VK_RayModelCreate( &ray_model_args );
|
||||
}
|
||||
}
|
||||
|
||||
void VK_RenderEnd( VkCommandBuffer cmdbuf )
|
||||
|
@ -626,9 +606,24 @@ void VK_RenderEndRTX( VkCommandBuffer cmdbuf )
|
|||
VK_RaySceneBegin();
|
||||
for (int i = 0; i < g_render_state.num_draw_commands; ++i) {
|
||||
const draw_command_t *const draw = g_render_state.draw_commands + i;
|
||||
if (draw->ray_model) {
|
||||
VK_RayScenePushModel( cmdbuf, draw->ray_model );
|
||||
}
|
||||
const vk_buffer_alloc_t *vertex_buffer = getBufferFromHandle( draw->draw.vertex_buffer );
|
||||
const vk_buffer_alloc_t *index_buffer = draw->draw.index_buffer != InvalidHandle ? getBufferFromHandle( draw->draw.index_buffer ) : NULL;
|
||||
const uint32_t vertex_offset = vertex_buffer->buffer_offset_in_units + draw->draw.vertex_offset;
|
||||
|
||||
// TODO there's a more complex story with lifetimes and rebuilds && vertex_buffer->lifetime < LifetimeSingleFrame)
|
||||
// TODO it would make sense to join logical models into a single ray model
|
||||
// but here we've completely lost this info, as models are now just a stream
|
||||
// of independent draws
|
||||
|
||||
const vk_ray_model_create_t ray_model_args = {
|
||||
.element_count = draw->draw.element_count,
|
||||
.max_vertex = vertex_buffer->count, // TODO this is an upper bound for brushes at least, it can be lowered
|
||||
.index_offset = index_buffer ? index_buffer->unit_size * (draw->draw.index_offset + index_buffer->buffer_offset_in_units) : UINT32_MAX,
|
||||
.vertex_offset = (draw->draw.vertex_offset + vertex_buffer->buffer_offset_in_units) * vertex_buffer->unit_size,
|
||||
.buffer = g_render.buffer.buffer,
|
||||
};
|
||||
|
||||
VK_RayScenePushModel(cmdbuf, &ray_model_args);
|
||||
}
|
||||
VK_RaySceneEnd( cmdbuf );
|
||||
}
|
||||
|
|
|
@ -9,15 +9,17 @@
|
|||
|
||||
#define MAX_ACCELS 1024
|
||||
#define MAX_SCRATCH_BUFFER (16*1024*1024)
|
||||
#define MAX_ACCELS_BUFFER (16*1024*1024)
|
||||
#define MAX_ACCELS_BUFFER (64*1024*1024)
|
||||
|
||||
/*
|
||||
typedef struct {
|
||||
//int lightmap, texture;
|
||||
//int render_mode;
|
||||
uint32_t element_count, vertex_count;
|
||||
uint32_t element_count;
|
||||
uint32_t index_offset, vertex_offset;
|
||||
VkBuffer buffer;
|
||||
} vk_ray_model_t;
|
||||
*/
|
||||
|
||||
static struct {
|
||||
/* VkPipelineLayout pipeline_layout; */
|
||||
|
@ -68,11 +70,16 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons
|
|||
return false;
|
||||
}
|
||||
|
||||
vkGetAccelerationStructureBuildSizesKHR(
|
||||
vkGetAccelerationStructureBuildSizesKHR(
|
||||
vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info, max_prim_counts, &build_size);
|
||||
|
||||
gEngine.Con_Reportf(
|
||||
"AS build size: %d, scratch size: %d", build_size.accelerationStructureSize, build_size.buildScratchSize);
|
||||
{
|
||||
uint32_t max_prims = 0;
|
||||
for (int i = 0; i < n_geoms; ++i)
|
||||
max_prims += max_prim_counts[i];
|
||||
gEngine.Con_Reportf(
|
||||
"AS max_prims=%u, n_geoms=%u, build size: %d, scratch size: %d\n", max_prims, n_geoms, build_size.accelerationStructureSize, build_size.buildScratchSize);
|
||||
}
|
||||
|
||||
if (MAX_SCRATCH_BUFFER - g_rtx_scene.scratch_offset < build_size.buildScratchSize) {
|
||||
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
|
||||
|
@ -93,6 +100,7 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons
|
|||
|
||||
// TODO alignment?
|
||||
g_rtx_scene.buffer_offset += build_size.accelerationStructureSize;
|
||||
g_rtx_scene.buffer_offset = (g_rtx_scene.buffer_offset + 255) & ~255; // Buffer must be aligned to 256 according to spec
|
||||
g_rtx_scene.num_accels++;
|
||||
|
||||
build_info.dstAccelerationStructure = *handle;
|
||||
|
@ -117,38 +125,46 @@ void VK_RaySceneBegin( void )
|
|||
g_rtx_scene.num_accels = 0;
|
||||
}
|
||||
|
||||
static vk_ray_model_t *getModelByHandle(vk_ray_model_handle_t handle);
|
||||
/*
|
||||
static vk_ray_model_t *getModelByHandle(vk_ray_model_handle_t handle)
|
||||
{
|
||||
}
|
||||
*/
|
||||
|
||||
void VK_RayScenePushModel( VkCommandBuffer cmdbuf, vk_ray_model_handle_t model_handle )
|
||||
void VK_RayScenePushModel( VkCommandBuffer cmdbuf, const vk_ray_model_create_t *model) // _handle_t model_handle )
|
||||
{
|
||||
ASSERT(vk_core.rtx);
|
||||
vk_ray_model_t *model = getModelByHandle(model_handle);
|
||||
const VkDeviceAddress buffer_addr = getBufferDeviceAddress(model->buffer);
|
||||
const VkAccelerationStructureGeometryKHR geom[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
||||
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR,
|
||||
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
|
||||
.geometry.triangles =
|
||||
(VkAccelerationStructureGeometryTrianglesDataKHR){
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
|
||||
.indexType = model->index_offset == UINT32_MAX ? VK_INDEX_TYPE_NONE_KHR : VK_INDEX_TYPE_UINT16,
|
||||
.maxVertex = model->vertex_count,
|
||||
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
|
||||
.vertexStride = sizeof(vk_vertex_t),
|
||||
.vertexData.deviceAddress = buffer_addr + model->vertex_offset,
|
||||
.indexData.deviceAddress = buffer_addr + model->index_offset,
|
||||
},
|
||||
}};
|
||||
|
||||
const uint32_t max_prim_counts[ARRAYSIZE(geom)] = {model->vertex_count};
|
||||
const VkAccelerationStructureBuildRangeInfoKHR build_range_tri = {
|
||||
.primitiveCount = model->element_count / 3,
|
||||
};
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *build_ranges[ARRAYSIZE(geom)] = {&build_range_tri};
|
||||
{
|
||||
//vk_ray_model_t *model = getModelByHandle(model_handle);
|
||||
const VkDeviceAddress buffer_addr = getBufferDeviceAddress(model->buffer);
|
||||
const uint32_t prim_count = model->element_count / 3;
|
||||
const VkAccelerationStructureGeometryKHR geom[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
||||
.flags = VK_GEOMETRY_OPAQUE_BIT_KHR,
|
||||
.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR,
|
||||
.geometry.triangles =
|
||||
(VkAccelerationStructureGeometryTrianglesDataKHR){
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
|
||||
.indexType = model->index_offset == UINT32_MAX ? VK_INDEX_TYPE_NONE_KHR : VK_INDEX_TYPE_UINT16,
|
||||
.maxVertex = model->max_vertex,
|
||||
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
|
||||
.vertexStride = sizeof(vk_vertex_t),
|
||||
.vertexData.deviceAddress = buffer_addr + model->vertex_offset,
|
||||
.indexData.deviceAddress = buffer_addr + model->index_offset,
|
||||
},
|
||||
} };
|
||||
|
||||
createAndBuildAccelerationStructure(cmdbuf,
|
||||
geom, max_prim_counts, build_ranges, ARRAYSIZE(geom), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
|
||||
const uint32_t max_prim_counts[ARRAYSIZE(geom)] = { prim_count };
|
||||
const VkAccelerationStructureBuildRangeInfoKHR build_range_tri = {
|
||||
.primitiveCount = prim_count,
|
||||
};
|
||||
const VkAccelerationStructureBuildRangeInfoKHR* build_ranges[ARRAYSIZE(geom)] = { &build_range_tri };
|
||||
|
||||
createAndBuildAccelerationStructure(cmdbuf,
|
||||
geom, max_prim_counts, build_ranges, ARRAYSIZE(geom), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
|
||||
}
|
||||
}
|
||||
|
||||
void VK_RaySceneEnd( VkCommandBuffer cmdbuf )
|
||||
|
@ -190,5 +206,7 @@ void VK_RayShutdown( void )
|
|||
ASSERT(vk_core.rtx);
|
||||
destroyBuffer(&g_rtx.scratch_buffer);
|
||||
destroyBuffer(&g_rtx.accels_buffer);
|
||||
|
||||
// TODO dealloc all ASes
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
typedef struct {
|
||||
//int lightmap, texture;
|
||||
//int render_mode;
|
||||
uint32_t element_count, vertex_count;
|
||||
uint32_t max_vertex;
|
||||
uint32_t element_count;
|
||||
uint32_t index_offset, vertex_offset;
|
||||
VkBuffer buffer;
|
||||
} vk_ray_model_create_t;
|
||||
|
@ -16,7 +17,7 @@ enum { InvalidRayModel = -1 };
|
|||
vk_ray_model_handle_t VK_RayModelCreate( const vk_ray_model_create_t *args );
|
||||
|
||||
void VK_RaySceneBegin( void );
|
||||
void VK_RayScenePushModel( VkCommandBuffer cmdbuf, vk_ray_model_handle_t model );
|
||||
void VK_RayScenePushModel(VkCommandBuffer cmdbuf, const vk_ray_model_create_t* model); // vk_ray_model_handle_t model );
|
||||
void VK_RaySceneEnd( VkCommandBuffer cmdbuf );
|
||||
|
||||
qboolean VK_RayInit( void );
|
||||
|
|
|
@ -345,7 +345,7 @@ static qboolean VK_UploadTexture(vk_texture_t *tex, rgbdata_t *pic)
|
|||
{
|
||||
VkMemoryRequirements memreq;
|
||||
vkGetImageMemoryRequirements(vk_core.device, tex->vk.image, &memreq);
|
||||
tex->vk.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
tex->vk.device_memory = allocateDeviceMemory(memreq, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0);
|
||||
XVK_CHECK(vkBindImageMemory(vk_core.device, tex->vk.image, tex->vk.device_memory.device_memory, tex->vk.device_memory.offset));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue