mirror of
https://github.com/w23/xash3d-fwgs
synced 2024-12-16 22:20:01 +01:00
rtx: fix corrupted geometry
TLAS creation pattern ended up allocating memory in a temporary region. It was later reused for BLAS data, and subsequently corrupted by TLAS rebuild. Also fixed memory leak, added freeze-models command, added model debug code into shaders, etc.
This commit is contained in:
parent
354eef1c0a
commit
5ddeb6853d
@ -1,8 +1,14 @@
|
||||
## 2021-05-08, E92
|
||||
- [x] rtx: weird purple bbox-like glitches on dynamic geometry (tlas vs blas memory corruption/aliasing)
|
||||
|
||||
# Next
|
||||
- [ ] make map/frame lifetime aware allocator and use it everywhere: render, rtx buffers, etc
|
||||
- [ ] rtx: improve AS lifetime/management; i.e. pre-cache them, etc
|
||||
- [ ] add debug names to all of the buffers
|
||||
- [ ] nvnsight into buffer memory and stuff
|
||||
- [ ] rtx: build acceleration structures in a single queue/cmdbuf
|
||||
- [ ] studio models: pre-compute buffer sizes and allocate them at once
|
||||
- [ ] studio models: fix lighting: should have white texture instead of lightmap
|
||||
- [ ] rtx: dynamic models AS caching
|
||||
- [ ] rtx: weird purple bbox-like glitches on dynamic geometry
|
||||
- [ ] rtx: better memory handling
|
||||
- [ ] robust tracking of memory hierarchies: global/static, map, frame
|
||||
- or just do a generic allocator with compaction?
|
||||
@ -225,3 +231,6 @@
|
||||
|
||||
## 2021-05-01, E89
|
||||
- [x] make a wrapper for descriptor sets/layouts
|
||||
|
||||
## 2021-05-03, E90
|
||||
- [x] make map/frame lifetime aware allocator and use it everywhere: render, rtx buffers, etc
|
||||
|
@ -150,6 +150,32 @@ layout (push_constant) uniform PC {
|
||||
//uint picked_light = 76;//uint(mod(pc.t * 4., emissive_kusochki.num_kusochki));
|
||||
int time_off = int(pc.t * 8.);
|
||||
|
||||
float hash(float f) { return fract(sin(f)*53478.4327); }
|
||||
|
||||
float printTiledNumber(vec2 p, int n) {
|
||||
if (n == 0) return 0.;
|
||||
float t = pc.t;
|
||||
float x = floor(p.x / 5. / 2.);
|
||||
//p.y += 12. * fract(pc.t * (4. + 3. * hash(x)));
|
||||
p = floor(p / 2.);
|
||||
vec2 pc = floor(p / vec2(5.,6.));
|
||||
vec2 pg = mod(p, vec2(5.,6.));
|
||||
float lx = 1.;
|
||||
float col = 0.;
|
||||
|
||||
#define PUTN(n) printInt(n,pg,pc,lx,col)
|
||||
|
||||
// float ncol = floor(pc.x / 3.);
|
||||
// float tlen = floor(16. + 32. * hash(ncol));
|
||||
// pc.y = mod(pc.y + floor(t * (6. + 9. * hash(ncol))), tlen);
|
||||
// if (pc.y > tlen * .6) return 0.;
|
||||
|
||||
pc.y = mod(pc.y, 2.);
|
||||
pc.x = mod(pc.x, 3.);
|
||||
PUTN(n);
|
||||
return col;
|
||||
}
|
||||
|
||||
float printText(in vec2 p) {
|
||||
#define PIXSZ 4.
|
||||
p = floor(p / PIXSZ);
|
||||
@ -244,15 +270,15 @@ float rand01() {
|
||||
return uintBitsToFloat(0x3f800000 | (rand() & 0x007fffff)) - 1.;
|
||||
}
|
||||
|
||||
float hash(float f) { return fract(sin(f)*53478.4327); }
|
||||
|
||||
bool shadowed(vec3 pos, vec3 dir, float dist) {
|
||||
rayQueryEXT shadowRayQuery;
|
||||
rayQueryInitializeEXT(shadowRayQuery, tlas,
|
||||
gl_RayFlagsOpaqueEXT | gl_RayFlagsTerminateOnFirstHitEXT,
|
||||
0xff,
|
||||
pos, 0., dir, dist);
|
||||
while(rayQueryProceedEXT(shadowRayQuery)) {}
|
||||
while(rayQueryProceedEXT(shadowRayQuery)) {
|
||||
rayQueryConfirmIntersectionEXT(shadowRayQuery);
|
||||
}
|
||||
return rayQueryGetIntersectionTypeEXT(shadowRayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT;
|
||||
}
|
||||
|
||||
@ -274,8 +300,18 @@ void main() {
|
||||
for (int bounce = 0; bounce < pc.bounces; ++bounce) {
|
||||
rayQueryEXT rayQuery;
|
||||
rayQueryInitializeEXT(rayQuery, tlas, gl_RayFlagsOpaqueEXT, 0xff, O, 0., D, L);
|
||||
while(rayQueryProceedEXT(rayQuery)) {}
|
||||
while(rayQueryProceedEXT(rayQuery)) {
|
||||
rayQueryConfirmIntersectionEXT(rayQuery);
|
||||
}
|
||||
const float l = rayQueryGetIntersectionTEXT(rayQuery, true);
|
||||
if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionGeneratedEXT) {
|
||||
C += kc * vec3(0., 1., 0.);
|
||||
break;
|
||||
}
|
||||
if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionNoneEXT) {
|
||||
C += kc * vec3(0., 0., 0.);
|
||||
break;
|
||||
}
|
||||
if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionTriangleEXT) {
|
||||
C += kc * vec3(1., 0., 1.);
|
||||
break;
|
||||
@ -284,14 +320,20 @@ void main() {
|
||||
vec3 pos = O+D*l;
|
||||
|
||||
const int instance_kusochki_offset = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true);
|
||||
//const int instance_index = rayQueryGetIntersectionInstanceIdEXT(rayQuery, true);
|
||||
const int instance_index = rayQueryGetIntersectionInstanceIdEXT(rayQuery, true);
|
||||
const int geom_index = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true);
|
||||
const int kusok_index = instance_kusochki_offset + geom_index;
|
||||
const Kusok kusok = kusochki[kusok_index];
|
||||
//const uint leaf = kusochki[kusok_index].leaf-1;
|
||||
|
||||
//C = fract(pos / LIGHT_GRID_CELL_SIZE); break;
|
||||
//C = vec3(hash(float(geom_index)), hash(float(geom_index)+15.43), hash(float(geom_index)+34.));
|
||||
//C = vec3(hash(float(geom_index)), hash(float(geom_index)+15.43), hash(float(geom_index)+34.)); break;
|
||||
|
||||
// C = vec3(hash(float(instance_index)), hash(float(instance_index)+15.43), hash(float(instance_index)+34.)) + .1 * fract(pos/LIGHT_GRID_CELL_SIZE);
|
||||
// vec2 pix = vec2(1.,-1.) * vec2(gl_GlobalInvocationID.xy) + vec2(0., imageSize(image).y);
|
||||
// C = mix(C*.5, vec3(0., 1., 0.), printTiledNumber(pix, instance_index));
|
||||
// break;
|
||||
|
||||
//C = vec3(hash(float(leaf)), hash(float(leaf)+15.43), hash(float(leaf)+34.));
|
||||
//C = vec3(hash(float(leaf)), float(kusok.num_surface_lights) / 63., float(kusok.is_emissive));
|
||||
//break;
|
||||
|
@ -166,6 +166,7 @@ static struct {
|
||||
vk_image_t frames[2];
|
||||
|
||||
qboolean reload_pipeline;
|
||||
qboolean freeze_models;
|
||||
|
||||
// HACK: we don't have a way to properly destroy all models and their Vulkan objects on shutdown.
|
||||
// This makes validation layers unhappy. Remember created objects here and destroy them manually.
|
||||
@ -229,7 +230,7 @@ static qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, cons
|
||||
"AS max_prims=%u, n_geoms=%u, build size: %d, scratch size: %d\n", max_prims, args->n_geoms, build_size.accelerationStructureSize, build_size.buildScratchSize);
|
||||
}
|
||||
|
||||
if (MAX_SCRATCH_BUFFER - g_rtx.frame.scratch_offset < scratch_buffer_size) {
|
||||
if (MAX_SCRATCH_BUFFER < g_rtx.frame.scratch_offset + scratch_buffer_size) {
|
||||
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
|
||||
MAX_SCRATCH_BUFFER - g_rtx.frame.scratch_offset,
|
||||
scratch_buffer_size);
|
||||
@ -253,11 +254,17 @@ static qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, cons
|
||||
}
|
||||
|
||||
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, args->accel));
|
||||
|
||||
// gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x\n", *args->accel, args->n_geoms, buffer_offset, build_size.accelerationStructureSize, buffer_offset + build_size.accelerationStructureSize);
|
||||
}
|
||||
|
||||
build_info.dstAccelerationStructure = *args->accel;
|
||||
build_info.scratchData.deviceAddress = g_rtx.scratch_buffer_addr + g_rtx.frame.scratch_offset;
|
||||
uint32_t scratch_offset_initial = g_rtx.frame.scratch_offset;
|
||||
g_rtx.frame.scratch_offset += scratch_buffer_size;
|
||||
g_rtx.frame.scratch_offset = ALIGN_UP(g_rtx.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment);
|
||||
|
||||
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
|
||||
|
||||
vkCmdBuildAccelerationStructuresKHR(cmdbuf, 1, &build_info, args->build_ranges);
|
||||
return true;
|
||||
@ -297,6 +304,9 @@ void VK_RayFrameBegin( void )
|
||||
{
|
||||
ASSERT(vk_core.rtx);
|
||||
|
||||
if (g_rtx.freeze_models)
|
||||
return;
|
||||
|
||||
// FIXME we depend on the fact that only a single frame can be in flight
|
||||
// currently framectl waits for the queue to complete before returning
|
||||
// so we can be sure here that previous frame is complete and we're free to
|
||||
@ -305,17 +315,26 @@ void VK_RayFrameBegin( void )
|
||||
vk_ray_model_t *model = g_rtx.frame.models + i;
|
||||
if (!model->dynamic)
|
||||
continue;
|
||||
if (model->accel == NULL)
|
||||
continue;
|
||||
|
||||
// TODO cache and reuse
|
||||
for (int j = 0; j < ARRAYSIZE(g_rtx.blases); ++j) {
|
||||
if (g_rtx.blases[j] == model->accel) {
|
||||
//gEngine.Con_Reportf("FrameBegin: frame model %d destroying AS=%p blas_index=%d\n", i, model->accel, j);
|
||||
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.blases[j], NULL);
|
||||
g_rtx.blases[j] = VK_NULL_HANDLE;
|
||||
model->accel = VK_NULL_HANDLE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (g_rtx.tlas != VK_NULL_HANDLE) {
|
||||
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.tlas, NULL);
|
||||
g_rtx.tlas = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
g_rtx.frame.scratch_offset = 0;
|
||||
g_rtx.frame.num_models = 0;
|
||||
g_rtx.frame.num_lighttextures = 0;
|
||||
@ -448,6 +467,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
|
||||
for (int i = 0; i < g_rtx.frame.num_models; ++i) {
|
||||
const vk_ray_model_t* const model = g_rtx.frame.models + i;
|
||||
ASSERT(model->accel != VK_NULL_HANDLE);
|
||||
//gEngine.Con_Reportf(" %d: AS=%p\n", i, model->accel);
|
||||
inst[i] = (VkAccelerationStructureInstanceKHR){
|
||||
.instanceCustomIndex = model->kusochki_offset,
|
||||
.mask = 0xff,
|
||||
@ -492,7 +512,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
|
||||
},
|
||||
},
|
||||
};
|
||||
const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = { MAX_ACCELS };
|
||||
const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = { g_rtx.frame.num_models };
|
||||
const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = {
|
||||
.primitiveCount = g_rtx.frame.num_models,
|
||||
};
|
||||
@ -503,7 +523,8 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
|
||||
.build_ranges = tl_build_ranges,
|
||||
.n_geoms = ARRAYSIZE(tl_geom),
|
||||
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
|
||||
.dynamic = true,
|
||||
// we can't really rebuild TLAS because instance count changes are not allowed .dynamic = true,
|
||||
.dynamic = false,
|
||||
.accel = &g_rtx.tlas,
|
||||
};
|
||||
if (!createOrUpdateAccelerationStructure(cmdbuf, &asrgs)) {
|
||||
@ -794,6 +815,10 @@ static void reloadPipeline( void ) {
|
||||
g_rtx.reload_pipeline = true;
|
||||
}
|
||||
|
||||
static void freezeModels( void ) {
|
||||
g_rtx.freeze_models = !g_rtx.freeze_models;
|
||||
}
|
||||
|
||||
qboolean VK_RayInit( void )
|
||||
{
|
||||
ASSERT(vk_core.rtx);
|
||||
@ -896,8 +921,10 @@ qboolean VK_RayInit( void )
|
||||
}
|
||||
}
|
||||
|
||||
if (vk_core.debug)
|
||||
if (vk_core.debug) {
|
||||
gEngine.Cmd_AddCommand("vk_rtx_reload", reloadPipeline, "Reload RTX shader");
|
||||
gEngine.Cmd_AddCommand("vk_rtx_freeze", freezeModels, "Freeze models, do not update/add/delete models from to-draw list");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -940,14 +967,18 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
|
||||
|
||||
ASSERT(vk_core.rtx);
|
||||
|
||||
if (g_rtx.freeze_models)
|
||||
return;
|
||||
|
||||
if (kusochki_count_offset == AllocFailed) {
|
||||
gEngine.Con_Printf(S_ERROR "Maximum number of kusochki exceeded on model %s\n", args.model->debug_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
geoms = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geoms));
|
||||
// FIXME don't touch allocator each frame many times pls
|
||||
geoms = Mem_Calloc(vk_core.pool, args.model->num_geometries * sizeof(*geoms));
|
||||
geom_max_prim_counts = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_max_prim_counts));
|
||||
geom_build_ranges = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
|
||||
geom_build_ranges = Mem_Calloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
|
||||
geom_build_ranges_ptr = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
|
||||
|
||||
kusochki = (vk_kusok_data_t*)(g_rtx.kusochki_buffer.mapped) + kusochki_count_offset;
|
||||
@ -980,6 +1011,10 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
|
||||
},
|
||||
};
|
||||
|
||||
// gEngine.Con_Printf(" g%d: v(%#x %d %#x) V%d i(%#x %d %#x) I%d\n", i,
|
||||
// vertex_offset*sizeof(vk_vertex_t), mg->vertex_count * sizeof(vk_vertex_t), (vertex_offset + mg->vertex_count) * sizeof(vk_vertex_t), mg->vertex_count,
|
||||
// index_offset*sizeof(uint16_t), mg->element_count * sizeof(uint16_t), (index_offset + mg->element_count) * sizeof(uint16_t), mg->element_count);
|
||||
|
||||
geom_build_ranges[i] = (VkAccelerationStructureBuildRangeInfoKHR) {
|
||||
.primitiveCount = prim_count,
|
||||
};
|
||||
@ -1040,6 +1075,7 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
|
||||
g_rtx.frame.scratch_offset = 0;
|
||||
}
|
||||
|
||||
Mem_Free(geom_build_ranges_ptr);
|
||||
Mem_Free(geom_build_ranges);
|
||||
Mem_Free(geom_max_prim_counts);
|
||||
Mem_Free(geoms);
|
||||
@ -1053,6 +1089,8 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
|
||||
}
|
||||
}
|
||||
|
||||
// gEngine.Con_Reportf("Model %s generated AS=%p blas_index=%d\n", args.model->debug_name, args.model->rtx.blas, blas_index);
|
||||
|
||||
if (blas_index == ARRAYSIZE(g_rtx.blases))
|
||||
gEngine.Con_Printf(S_WARN "Too many BLASes created :(\n");
|
||||
}
|
||||
@ -1063,6 +1101,8 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
|
||||
}
|
||||
|
||||
void VK_RayModelDestroy( struct vk_render_model_s *model ) {
|
||||
ASSERT(!g_rtx.freeze_models);
|
||||
|
||||
ASSERT(vk_core.rtx);
|
||||
if (model->rtx.blas != VK_NULL_HANDLE) {
|
||||
int blas_index;
|
||||
@ -1075,6 +1115,8 @@ void VK_RayModelDestroy( struct vk_render_model_s *model ) {
|
||||
if (blas_index == ARRAYSIZE(g_rtx.blases))
|
||||
gEngine.Con_Printf(S_WARN "Model BLAS was missing\n");
|
||||
|
||||
// gEngine.Con_Reportf("Model %s destroying AS=%p blas_index=%d\n", model->debug_name, model->rtx.blas, blas_index);
|
||||
|
||||
vkDestroyAccelerationStructureKHR(vk_core.device, model->rtx.blas, NULL);
|
||||
model->rtx.blas = VK_NULL_HANDLE;
|
||||
}
|
||||
@ -1085,6 +1127,9 @@ void VK_RayFrameAddModel( const struct vk_render_model_s *model, const matrix3x4
|
||||
|
||||
ASSERT(g_rtx.frame.num_models <= ARRAYSIZE(g_rtx.frame.models));
|
||||
|
||||
if (g_rtx.freeze_models)
|
||||
return;
|
||||
|
||||
if (g_rtx.frame.num_models == ARRAYSIZE(g_rtx.frame.models)) {
|
||||
gEngine.Con_Printf(S_ERROR "Ran out of AccelerationStructure slots\n");
|
||||
return;
|
||||
|
Loading…
Reference in New Issue
Block a user