rtx: fix corrupted geometry

TLAS creation pattern ended up allocating memory in a temporary region. It was later reused for BLAS data, and subsequently corrupted by TLAS rebuild.

Also fixed memory leak, added freeze-models command, added model debug code into shaders, etc.
This commit is contained in:
Ivan 'provod' Avdeev 2021-05-08 14:34:42 -07:00
parent 354eef1c0a
commit 5ddeb6853d
3 changed files with 110 additions and 14 deletions

View File

@ -1,8 +1,14 @@
## 2021-05-08, E92
- [x] rtx: weird purple bbox-like glitches on dynamic geometry (tlas vs blas memory corruption/aliasing)
# Next
- [ ] make map/frame lifetime aware allocator and use it everywhere: render, rtx buffers, etc
- [ ] rtx: improve AS lifetime/management; i.e. pre-cache them, etc
- [ ] add debug names to all of the buffers
- [ ] nvnsight into buffer memory and stuff
- [ ] rtx: build acceleration structures in a single queue/cmdbuf
- [ ] studio models: pre-compute buffer sizes and allocate them at once
- [ ] studio models: fix lighting: should have white texture instead of lightmap
- [ ] rtx: dynamic models AS caching
- [ ] rtx: weird purple bbox-like glitches on dynamic geometry
- [ ] rtx: better memory handling
- [ ] robust tracking of memory hierarchies: global/static, map, frame
- or just do a generic allocator with compaction?
@ -225,3 +231,6 @@
## 2021-05-01, E89
- [x] make a wrapper for descriptor sets/layouts
## 2021-05-03, E90
- [x] make map/frame lifetime aware allocator and use it everywhere: render, rtx buffers, etc

View File

@ -150,6 +150,32 @@ layout (push_constant) uniform PC {
//uint picked_light = 76;//uint(mod(pc.t * 4., emissive_kusochki.num_kusochki));
int time_off = int(pc.t * 8.);
float hash(float f) { return fract(sin(f)*53478.4327); }
float printTiledNumber(vec2 p, int n) {
if (n == 0) return 0.;
float t = pc.t;
float x = floor(p.x / 5. / 2.);
//p.y += 12. * fract(pc.t * (4. + 3. * hash(x)));
p = floor(p / 2.);
vec2 pc = floor(p / vec2(5.,6.));
vec2 pg = mod(p, vec2(5.,6.));
float lx = 1.;
float col = 0.;
#define PUTN(n) printInt(n,pg,pc,lx,col)
// float ncol = floor(pc.x / 3.);
// float tlen = floor(16. + 32. * hash(ncol));
// pc.y = mod(pc.y + floor(t * (6. + 9. * hash(ncol))), tlen);
// if (pc.y > tlen * .6) return 0.;
pc.y = mod(pc.y, 2.);
pc.x = mod(pc.x, 3.);
PUTN(n);
return col;
}
float printText(in vec2 p) {
#define PIXSZ 4.
p = floor(p / PIXSZ);
@ -244,15 +270,15 @@ float rand01() {
return uintBitsToFloat(0x3f800000 | (rand() & 0x007fffff)) - 1.;
}
float hash(float f) { return fract(sin(f)*53478.4327); }
bool shadowed(vec3 pos, vec3 dir, float dist) {
rayQueryEXT shadowRayQuery;
rayQueryInitializeEXT(shadowRayQuery, tlas,
gl_RayFlagsOpaqueEXT | gl_RayFlagsTerminateOnFirstHitEXT,
0xff,
pos, 0., dir, dist);
while(rayQueryProceedEXT(shadowRayQuery)) {}
while(rayQueryProceedEXT(shadowRayQuery)) {
rayQueryConfirmIntersectionEXT(shadowRayQuery);
}
return rayQueryGetIntersectionTypeEXT(shadowRayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT;
}
@ -274,8 +300,18 @@ void main() {
for (int bounce = 0; bounce < pc.bounces; ++bounce) {
rayQueryEXT rayQuery;
rayQueryInitializeEXT(rayQuery, tlas, gl_RayFlagsOpaqueEXT, 0xff, O, 0., D, L);
while(rayQueryProceedEXT(rayQuery)) {}
while(rayQueryProceedEXT(rayQuery)) {
rayQueryConfirmIntersectionEXT(rayQuery);
}
const float l = rayQueryGetIntersectionTEXT(rayQuery, true);
if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionGeneratedEXT) {
C += kc * vec3(0., 1., 0.);
break;
}
if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionNoneEXT) {
C += kc * vec3(0., 0., 0.);
break;
}
if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionTriangleEXT) {
C += kc * vec3(1., 0., 1.);
break;
@ -284,14 +320,20 @@ void main() {
vec3 pos = O+D*l;
const int instance_kusochki_offset = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true);
//const int instance_index = rayQueryGetIntersectionInstanceIdEXT(rayQuery, true);
const int instance_index = rayQueryGetIntersectionInstanceIdEXT(rayQuery, true);
const int geom_index = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true);
const int kusok_index = instance_kusochki_offset + geom_index;
const Kusok kusok = kusochki[kusok_index];
//const uint leaf = kusochki[kusok_index].leaf-1;
//C = fract(pos / LIGHT_GRID_CELL_SIZE); break;
//C = vec3(hash(float(geom_index)), hash(float(geom_index)+15.43), hash(float(geom_index)+34.));
//C = vec3(hash(float(geom_index)), hash(float(geom_index)+15.43), hash(float(geom_index)+34.)); break;
// C = vec3(hash(float(instance_index)), hash(float(instance_index)+15.43), hash(float(instance_index)+34.)) + .1 * fract(pos/LIGHT_GRID_CELL_SIZE);
// vec2 pix = vec2(1.,-1.) * vec2(gl_GlobalInvocationID.xy) + vec2(0., imageSize(image).y);
// C = mix(C*.5, vec3(0., 1., 0.), printTiledNumber(pix, instance_index));
// break;
//C = vec3(hash(float(leaf)), hash(float(leaf)+15.43), hash(float(leaf)+34.));
//C = vec3(hash(float(leaf)), float(kusok.num_surface_lights) / 63., float(kusok.is_emissive));
//break;

View File

@ -166,6 +166,7 @@ static struct {
vk_image_t frames[2];
qboolean reload_pipeline;
qboolean freeze_models;
// HACK: we don't have a way to properly destroy all models and their Vulkan objects on shutdown.
// This makes validation layers unhappy. Remember created objects here and destroy them manually.
@ -229,7 +230,7 @@ static qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, cons
"AS max_prims=%u, n_geoms=%u, build size: %d, scratch size: %d\n", max_prims, args->n_geoms, build_size.accelerationStructureSize, build_size.buildScratchSize);
}
if (MAX_SCRATCH_BUFFER - g_rtx.frame.scratch_offset < scratch_buffer_size) {
if (MAX_SCRATCH_BUFFER < g_rtx.frame.scratch_offset + scratch_buffer_size) {
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
MAX_SCRATCH_BUFFER - g_rtx.frame.scratch_offset,
scratch_buffer_size);
@ -253,11 +254,17 @@ static qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, cons
}
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, args->accel));
// gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x\n", *args->accel, args->n_geoms, buffer_offset, build_size.accelerationStructureSize, buffer_offset + build_size.accelerationStructureSize);
}
build_info.dstAccelerationStructure = *args->accel;
build_info.scratchData.deviceAddress = g_rtx.scratch_buffer_addr + g_rtx.frame.scratch_offset;
uint32_t scratch_offset_initial = g_rtx.frame.scratch_offset;
g_rtx.frame.scratch_offset += scratch_buffer_size;
g_rtx.frame.scratch_offset = ALIGN_UP(g_rtx.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment);
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
vkCmdBuildAccelerationStructuresKHR(cmdbuf, 1, &build_info, args->build_ranges);
return true;
@ -297,6 +304,9 @@ void VK_RayFrameBegin( void )
{
ASSERT(vk_core.rtx);
if (g_rtx.freeze_models)
return;
// FIXME we depend on the fact that only a single frame can be in flight
// currently framectl waits for the queue to complete before returning
// so we can be sure here that previous frame is complete and we're free to
@ -305,17 +315,26 @@ void VK_RayFrameBegin( void )
vk_ray_model_t *model = g_rtx.frame.models + i;
if (!model->dynamic)
continue;
if (model->accel == NULL)
continue;
// TODO cache and reuse
for (int j = 0; j < ARRAYSIZE(g_rtx.blases); ++j) {
if (g_rtx.blases[j] == model->accel) {
//gEngine.Con_Reportf("FrameBegin: frame model %d destroying AS=%p blas_index=%d\n", i, model->accel, j);
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.blases[j], NULL);
g_rtx.blases[j] = VK_NULL_HANDLE;
model->accel = VK_NULL_HANDLE;
break;
}
}
}
if (g_rtx.tlas != VK_NULL_HANDLE) {
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.tlas, NULL);
g_rtx.tlas = VK_NULL_HANDLE;
}
g_rtx.frame.scratch_offset = 0;
g_rtx.frame.num_models = 0;
g_rtx.frame.num_lighttextures = 0;
@ -448,6 +467,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
for (int i = 0; i < g_rtx.frame.num_models; ++i) {
const vk_ray_model_t* const model = g_rtx.frame.models + i;
ASSERT(model->accel != VK_NULL_HANDLE);
//gEngine.Con_Reportf(" %d: AS=%p\n", i, model->accel);
inst[i] = (VkAccelerationStructureInstanceKHR){
.instanceCustomIndex = model->kusochki_offset,
.mask = 0xff,
@ -492,7 +512,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
},
},
};
const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = { MAX_ACCELS };
const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = { g_rtx.frame.num_models };
const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = {
.primitiveCount = g_rtx.frame.num_models,
};
@ -503,7 +523,8 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
.build_ranges = tl_build_ranges,
.n_geoms = ARRAYSIZE(tl_geom),
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
.dynamic = true,
// we can't really rebuild TLAS because instance count changes are not allowed .dynamic = true,
.dynamic = false,
.accel = &g_rtx.tlas,
};
if (!createOrUpdateAccelerationStructure(cmdbuf, &asrgs)) {
@ -794,6 +815,10 @@ static void reloadPipeline( void ) {
g_rtx.reload_pipeline = true;
}
static void freezeModels( void ) {
g_rtx.freeze_models = !g_rtx.freeze_models;
}
qboolean VK_RayInit( void )
{
ASSERT(vk_core.rtx);
@ -896,8 +921,10 @@ qboolean VK_RayInit( void )
}
}
if (vk_core.debug)
if (vk_core.debug) {
gEngine.Cmd_AddCommand("vk_rtx_reload", reloadPipeline, "Reload RTX shader");
gEngine.Cmd_AddCommand("vk_rtx_freeze", freezeModels, "Freeze models, do not update/add/delete models from to-draw list");
}
return true;
}
@ -940,14 +967,18 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
ASSERT(vk_core.rtx);
if (g_rtx.freeze_models)
return;
if (kusochki_count_offset == AllocFailed) {
gEngine.Con_Printf(S_ERROR "Maximum number of kusochki exceeded on model %s\n", args.model->debug_name);
return false;
}
geoms = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geoms));
// FIXME don't touch allocator each frame many times pls
geoms = Mem_Calloc(vk_core.pool, args.model->num_geometries * sizeof(*geoms));
geom_max_prim_counts = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_max_prim_counts));
geom_build_ranges = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
geom_build_ranges = Mem_Calloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
geom_build_ranges_ptr = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
kusochki = (vk_kusok_data_t*)(g_rtx.kusochki_buffer.mapped) + kusochki_count_offset;
@ -980,6 +1011,10 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
},
};
// gEngine.Con_Printf(" g%d: v(%#x %d %#x) V%d i(%#x %d %#x) I%d\n", i,
// vertex_offset*sizeof(vk_vertex_t), mg->vertex_count * sizeof(vk_vertex_t), (vertex_offset + mg->vertex_count) * sizeof(vk_vertex_t), mg->vertex_count,
// index_offset*sizeof(uint16_t), mg->element_count * sizeof(uint16_t), (index_offset + mg->element_count) * sizeof(uint16_t), mg->element_count);
geom_build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) {
.primitiveCount = prim_count,
};
@ -1040,6 +1075,7 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
g_rtx.frame.scratch_offset = 0;
}
Mem_Free(geom_build_ranges_ptr);
Mem_Free(geom_build_ranges);
Mem_Free(geom_max_prim_counts);
Mem_Free(geoms);
@ -1053,6 +1089,8 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
}
}
// gEngine.Con_Reportf("Model %s generated AS=%p blas_index=%d\n", args.model->debug_name, args.model->rtx.blas, blas_index);
if (blas_index == ARRAYSIZE(g_rtx.blases))
gEngine.Con_Printf(S_WARN "Too many BLASes created :(\n");
}
@ -1063,6 +1101,8 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
}
void VK_RayModelDestroy( struct vk_render_model_s *model ) {
ASSERT(!g_rtx.freeze_models);
ASSERT(vk_core.rtx);
if (model->rtx.blas != VK_NULL_HANDLE) {
int blas_index;
@ -1075,6 +1115,8 @@ void VK_RayModelDestroy( struct vk_render_model_s *model ) {
if (blas_index == ARRAYSIZE(g_rtx.blases))
gEngine.Con_Printf(S_WARN "Model BLAS was missing\n");
// gEngine.Con_Reportf("Model %s destroying AS=%p blas_index=%d\n", model->debug_name, model->rtx.blas, blas_index);
vkDestroyAccelerationStructureKHR(vk_core.device, model->rtx.blas, NULL);
model->rtx.blas = VK_NULL_HANDLE;
}
@ -1085,6 +1127,9 @@ void VK_RayFrameAddModel( const struct vk_render_model_s *model, const matrix3x4
ASSERT(g_rtx.frame.num_models <= ARRAYSIZE(g_rtx.frame.models));
if (g_rtx.freeze_models)
return;
if (g_rtx.frame.num_models == ARRAYSIZE(g_rtx.frame.models)) {
gEngine.Con_Printf(S_ERROR "Ran out of AccelerationStructure slots\n");
return;