Upload only dirty light clusters regions, PR #418

Ends up uploading only a few megs per frame, spread across a hundred or so ranges on average.
Still not great, now validation is too slow. But otherwise it's back to ~60fps.

I think it's end of the line for this approach. Even betterer light clusters would need a complete overhaul, e.g. being moved completely to GPU compute.

Fixes #385
This commit is contained in:
Ivan Avdeev 2023-02-07 09:26:43 -08:00 committed by GitHub
commit 3edcb7c007
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 113 additions and 43 deletions

View File

@ -1,6 +1,6 @@
layout (set = 0, binding = BINDING_LIGHTS) readonly buffer SBOLights { LightsMetadata m; } lights;
layout (set = 0, binding = BINDING_LIGHT_CLUSTERS, align = 1) readonly buffer UBOLightClusters {
ivec3 grid_min, grid_size;
//ivec3 grid_min, grid_size;
//uint8_t clusters_data[MAX_LIGHT_CLUSTERS * LIGHT_CLUSTER_SIZE + HACK_OFFSET];
LightCluster clusters_[MAX_LIGHT_CLUSTERS];
} light_grid;
@ -25,11 +25,11 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
diffuse = specular = vec3(0.);
//diffuse = vec3(1.);//float(lights.m.num_point_lights) / 64.);
//#define USE_CLUSTERS
#define USE_CLUSTERS
#ifdef USE_CLUSTERS
const uint num_point_lights = uint(light_grid.clusters[cluster_index].num_point_lights);
const uint num_point_lights = uint(light_grid.clusters_[cluster_index].num_point_lights);
for (uint j = 0; j < num_point_lights; ++j) {
const uint i = uint(light_grid.clusters[cluster_index].point_lights[j]);
const uint i = uint(light_grid.clusters_[cluster_index].point_lights[j]);
#else
for (uint i = 0; i < lights.m.num_point_lights; ++i) {
#endif
@ -116,11 +116,11 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
void computeLighting(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, MaterialProperties material, out vec3 diffuse, out vec3 specular) {
diffuse = specular = vec3(0.);
const ivec3 light_cell = ivec3(floor(P / LIGHT_GRID_CELL_SIZE)) - light_grid.grid_min;
const uint cluster_index = uint(dot(light_cell, ivec3(1, light_grid.grid_size.x, light_grid.grid_size.x * light_grid.grid_size.y)));
const ivec3 light_cell = ivec3(floor(P / LIGHT_GRID_CELL_SIZE)) - lights.m.grid_min_cell;
const uint cluster_index = uint(dot(light_cell, ivec3(1, lights.m.grid_size.x, lights.m.grid_size.x * lights.m.grid_size.y)));
#ifdef USE_CLUSTERS
if (any(greaterThanEqual(light_cell, light_grid.grid_size)) || cluster_index >= MAX_LIGHT_CLUSTERS)
if (any(greaterThanEqual(light_cell, lights.m.grid_size)) || cluster_index >= MAX_LIGHT_CLUSTERS)
return; // throughput * vec3(1., 0., 0.);
#endif

View File

@ -191,12 +191,12 @@ void sampleSinglePolygonLight(in vec3 P, in vec3 N, in vec3 view_dir, in SampleC
#if 0
// Sample random one
void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, MaterialProperties material, uint cluster_index, inout vec3 diffuse, inout vec3 specular) {
const uint num_polygons = uint(light_grid.clusters[cluster_index].num_polygons);
const uint num_polygons = uint(light_grid.clusters_[cluster_index].num_polygons);
if (num_polygons == 0)
return;
const uint selected = uint(light_grid.clusters[cluster_index].polygons[rand_range(num_polygons)]);
const uint selected = uint(light_grid.clusters_[cluster_index].polygons[rand_range(num_polygons)]);
const PolygonLight poly = lights.m.polygons[selected];
const SampleContext ctx = buildSampleContext(P, N, view_dir);
@ -212,11 +212,11 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate
#if DO_ALL_IN_CLUSTER
const SampleContext ctx = buildSampleContext(P, N, view_dir);
//#define USE_CLUSTERS
#define USE_CLUSTERS
#ifdef USE_CLUSTERS
const uint num_polygons = uint(light_grid.clusters[cluster_index].num_polygons);
const uint num_polygons = uint(light_grid.clusters_[cluster_index].num_polygons);
for (uint i = 0; i < num_polygons; ++i) {
const uint index = uint(light_grid.clusters[cluster_index].polygons[i]);
const uint index = uint(light_grid.clusters_[cluster_index].polygons[i]);
#else
for (uint index = 0; index < lights.m.num_polygons; ++index) {
#endif
@ -257,7 +257,7 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate
#ifdef USE_CLUSTERS
// TODO move this to pickPolygonLight function
const uint num_polygons = uint(light_grid.clusters[cluster_index].num_polygons);
const uint num_polygons = uint(light_grid.clusters_[cluster_index].num_polygons);
#else
const uint num_polygons = lights.m.num_polygons;
#endif
@ -267,7 +267,7 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate
float eps1 = rand01();
for (uint i = 0; i < num_polygons; ++i) {
#ifdef USE_CLUSTERS
const uint index = uint(light_grid.clusters[cluster_index].polygons[i]);
const uint index = uint(light_grid.clusters_[cluster_index].polygons[i]);
#else
const uint index = i;
#endif

View File

@ -21,6 +21,7 @@
#define vec3 vec3_t
#define vec4 vec4_t
#define mat4 matrix4x4
typedef int ivec3[3];
#define TOKENPASTE(x, y) x ## y
#define TOKENPASTE2(x, y) TOKENPASTE(x, y)
#define PAD(x) float TOKENPASTE2(pad_, __LINE__)[x];
@ -111,6 +112,10 @@ struct LightsMetadata {
uint num_polygons;
uint num_point_lights;
PAD(2)
ivec3 grid_min_cell;
PAD(1)
ivec3 grid_size;
PAD(1)
STRUCT PointLight point_lights[MAX_POINT_LIGHTS];
STRUCT PolygonLight polygons[MAX_EMISSIVE_KUSOCHKI];
vec4 polygon_vertices[MAX_EMISSIVE_KUSOCHKI * 7]; // vec3 but aligned

View File

@ -11,6 +11,9 @@
#include "vk_staging.h"
#include "vk_commandpool.h"
#include "vk_light.h" // For stats
#include "shaders/ray_interop.h" // stats: struct LightCluster
#include "profiler.h"
#include "eiface.h" // ARRAYSIZE
@ -182,8 +185,13 @@ static void updateGamma( void ) {
}
}
// FIXME move this to r print speeds or something like that
// FIXME move this to r_speeds or something like that
static void showProfilingData( void ) {
{
const int dirty = g_lights.stats.dirty_cells;
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
}
gEngine.Con_NPrintf(5, "Perf scopes:");
for (int i = 0; i < g_aprof.num_scopes; ++i) {
const aprof_scope_t *const scope = g_aprof.scopes + i;

View File

@ -38,16 +38,6 @@ typedef struct {
qboolean set;
} vk_emissive_texture_t;
typedef struct {
int min_cell[4], size[3]; // 4th element is padding
struct LightCluster cells[MAX_LIGHT_CLUSTERS];
} vk_ray_shader_light_grid_t;
struct Lights {
struct LightsMetadata metadata;
vk_ray_shader_light_grid_t grid;
};
static struct {
struct {
vk_emissive_texture_t emissive_textures[MAX_TEXTURES];
@ -72,6 +62,7 @@ static struct {
bit_array_t visited_cells;
uint32_t frame_sequence;
} g_lights_;
static struct {
@ -95,7 +86,9 @@ qboolean VK_LightsInit( void ) {
gEngine.Cmd_AddCommand("vk_lights_dump", debugDumpLights, "Dump all light sources for next frame");
if (!VK_BufferCreate("rt lights buffer", &g_lights_.buffer, sizeof(struct Lights),
const int buffer_size = sizeof(struct LightsMetadata) + sizeof(struct LightCluster) * MAX_LIGHT_CLUSTERS;
if (!VK_BufferCreate("rt lights buffer", &g_lights_.buffer, buffer_size,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
// FIXME complain, handle
@ -536,6 +529,7 @@ void RT_LightsNewMapBegin( const struct model_s *map ) {
vk_lights_cell_t *const cell = g_lights.cells + i;
cell->num_point_lights = cell->num_static.point_lights = 0;
cell->num_polygons = cell->num_static.polygons = 0;
cell->frame_sequence = g_lights_.frame_sequence;
}
}
}
@ -545,6 +539,8 @@ void RT_LightsFrameBegin( void ) {
g_lights_.num_point_lights = g_lights_.num_static.point_lights;
g_lights_.num_polygon_vertices = g_lights_.num_static.polygon_vertices;
g_lights.stats.dirty_cells = 0;
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
vk_lights_cell_t *const cell = g_lights.cells + i;
cell->num_polygons = cell->num_static.polygons;
@ -564,6 +560,10 @@ static qboolean addSurfaceLightToCell( int cell_index, int polygon_light_index )
}
cluster->polygons[cluster->num_polygons++] = polygon_light_index;
if (cluster->frame_sequence != g_lights_.frame_sequence) {
++g_lights.stats.dirty_cells;
cluster->frame_sequence = g_lights_.frame_sequence;
}
return true;
}
@ -578,6 +578,11 @@ static qboolean addLightToCell( int cell_index, int light_index ) {
}
cluster->point_lights[cluster->num_point_lights++] = light_index;
if (cluster->frame_sequence != g_lights_.frame_sequence) {
++g_lights.stats.dirty_cells;
cluster->frame_sequence = g_lights_.frame_sequence;
}
return true;
}
@ -943,6 +948,8 @@ void RT_LightsNewMapEnd( const struct model_s *map ) {
cell->num_static.polygons = cell->num_polygons;
}
}
g_lights.stats.dirty_cells = g_lights.map.grid_cells;
}
qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id ) {
@ -1124,21 +1131,59 @@ int RT_LightAddPolygon(const rt_light_add_polygon_t *addpoly) {
}
}
static void uploadGrid( vk_ray_shader_light_grid_t *grid ) {
ASSERT(g_lights.map.grid_cells <= MAX_LIGHT_CLUSTERS);
static void uploadGridRange( int begin, int end ) {
const int count = end - begin;
ASSERT( count > 0 );
VectorCopy(g_lights.map.grid_min_cell, grid->min_cell);
VectorCopy(g_lights.map.grid_size, grid->size);
const int size = count * sizeof(struct LightCluster);
const vk_staging_region_t locked = R_VkStagingLockForBuffer( (vk_staging_buffer_args_t) {
.buffer = g_lights_.buffer.buffer,
.offset = sizeof(struct LightsMetadata) + begin * sizeof(struct LightCluster),
.size = size,
.alignment = 16, // WHY?
} );
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
const vk_lights_cell_t *const src = g_lights.cells + i;
struct LightCluster *const dst = grid->cells + i;
ASSERT(locked.ptr);
struct LightCluster *const grid = locked.ptr;
memset(grid, 0, size);
for (int i = 0; i < count; ++i) {
const vk_lights_cell_t *const src = g_lights.cells + i + begin;
struct LightCluster *const dst = grid + i;
dst->num_point_lights = src->num_point_lights;
dst->num_polygons = src->num_polygons;
memcpy(dst->point_lights, src->point_lights, sizeof(uint8_t) * src->num_point_lights);
memcpy(dst->polygons, src->polygons, sizeof(uint8_t) * src->num_polygons);
}
R_VkStagingUnlock( locked.handle );
g_lights.stats.ranges_uploaded++;
}
static void uploadGrid( void ) {
ASSERT(g_lights.map.grid_cells <= MAX_LIGHT_CLUSTERS);
g_lights.stats.ranges_uploaded = 0;
int begin = -1;
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
const vk_lights_cell_t *const cell = g_lights.cells + i;
const qboolean dirty = cell->frame_sequence == g_lights_.frame_sequence;
if (dirty && begin < 0)
begin = i;
if (!dirty && begin >= 0) {
uploadGridRange(begin, i);
begin = -1;
}
}
if (begin >= 0)
uploadGridRange(begin, g_lights.map.grid_cells);
}
static void uploadPolygonLights( struct LightsMetadata *metadata ) {
@ -1189,26 +1234,31 @@ static void uploadPointLights( struct LightsMetadata *metadata ) {
}
}
vk_lights_bindings_t VK_LightsUpload( VkCommandBuffer cmdbuf ) {
vk_lights_bindings_t VK_LightsUpload( void ) {
const vk_staging_region_t locked = R_VkStagingLockForBuffer( (vk_staging_buffer_args_t) {
.buffer = g_lights_.buffer.buffer,
.offset = 0,
.size = sizeof(struct LightsMetadata),
.alignment = 16,
.alignment = 16, // WHY?
} );
ASSERT(locked.ptr);
struct LightsMetadata *metadata = locked.ptr;
memset(metadata, 0, sizeof(*metadata));
VectorCopy(g_lights.map.grid_min_cell, metadata->grid_min_cell);
VectorCopy(g_lights.map.grid_size, metadata->grid_size);
uploadPolygonLights( metadata );
uploadPointLights( metadata );
// FIXME uploadGrid( &lights->grid );
R_VkStagingUnlock( locked.handle );
uploadGrid();
g_lights_.frame_sequence++;
return (vk_lights_bindings_t){
.buffer = g_lights_.buffer.buffer,
.metadata = {
@ -1217,7 +1267,7 @@ vk_lights_bindings_t VK_LightsUpload( VkCommandBuffer cmdbuf ) {
},
.grid = {
.offset = sizeof(struct LightsMetadata),
.size = sizeof(vk_ray_shader_light_grid_t),
.size = sizeof(struct LightCluster) * MAX_LIGHT_CLUSTERS,
},
};
}

View File

@ -16,6 +16,8 @@ typedef struct {
uint8_t point_lights;
uint8_t polygons;
} num_static;
uint32_t frame_sequence;
} vk_lights_cell_t;
typedef struct {
@ -57,6 +59,11 @@ typedef struct {
} map;
vk_lights_cell_t cells[MAX_LIGHT_CLUSTERS];
struct {
int dirty_cells;
int ranges_uploaded;
} stats;
} vk_lights_t;
extern vk_lights_t g_lights;
@ -77,7 +84,7 @@ typedef struct {
uint32_t offset, size;
} metadata, grid;
} vk_lights_bindings_t;
vk_lights_bindings_t VK_LightsUpload( VkCommandBuffer );
vk_lights_bindings_t VK_LightsUpload( void );
qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id );

View File

@ -47,7 +47,7 @@
X(Buffer, indices) \
X(Buffer, vertices) \
X(Buffer, lights) \
X(Buffer, light_clusters) \
X(Buffer, light_grid) \
X(Texture, textures) \
X(Texture, skybox)
@ -219,7 +219,7 @@ static void performTracing(VkCommandBuffer cmdbuf, const perform_tracing_args_t*
// TODO move this to lights
RES_SET_BUFFER(lights, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->metadata.offset, args->light_bindings->metadata.size);
RES_SET_BUFFER(light_clusters, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->grid.offset, args->light_bindings->grid.size);
RES_SET_BUFFER(light_grid, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->grid.offset, args->light_bindings->grid.size);
#undef RES_SET_SBUFFER_FULL
#undef RES_SET_BUFFER
@ -547,7 +547,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
// FIXME pass these matrices explicitly to let RTX module handle ubo itself
RT_LightsFrameEnd();
const vk_lights_bindings_t light_bindings = VK_LightsUpload(cmdbuf);
const vk_lights_bindings_t light_bindings = VK_LightsUpload();
g_rtx.frame_number++;