rtx: read kusochki data, add dynamic lights back

also add fps
This commit is contained in:
Ivan 'provod' Avdeev 2021-04-10 12:46:37 -07:00
parent 573c291cef
commit 0407f12638
5 changed files with 94 additions and 75 deletions

View File

@ -1,23 +1,18 @@
## 2021-04-10
- [x] rtx: fix tlas rebuild
- [ ] rtx: upload kusochki metadata w/ leaves
# Next
- [ ] rtx: fix blas destruction on exit
- [ ] rtx: use light visibility data
- [ ] sometimes we get uninitialized models
- [ ] run under asan
- [ ] rtx: fix blas destruction on exit
- [ ] rtx: read rad file data
- [ ] rtx: hack: interpret textures with '~' or '{' as emissive
- [ ] rtx: emissive particles
- [ ] rtx: textures
- [ ] rtx: add fps
- [ ] rtx: don't group brush draws by texture
-- this has failed: increases BLAS count to ~3000, halves fps
- [ ] better AS structure (fewer blases, etc)
- [ ] rasterize into G-buffer, and only then compute lighting with rtx
- [ ] rtx: better random
- [ ] rtx: some studio models have glitchy geometry
# Planned
- [ ] rtx: add fps: rasterize into G-buffer, and only then compute lighting with rtx
- [ ] bake light visibility in compute shader
- [ ] rtx: cull light sources (dlights and light textures) using bsp
- [ ] enable entity-parsed lights by lightstyles
@ -166,3 +161,10 @@
## 2021-04-09
- [x] rtx: build AS for model
- [x] rtx: include pre-built models in TLAS
## 2021-04-10
- [x] rtx: fix tlas rebuild
- [x] rtx: upload kusochki metadata ~~w/ leaves~~
- [x] rtx: add fps
- [x] rtx: don't group brush draws by texture
- [x] better AS structure (fewer blases, etc)

View File

@ -41,6 +41,8 @@ void printInt(in float num, in vec2 pg, in vec2 pc, inout float lx, inout float
} else {
PUTC(diGlyph(mod(floor(num/1000.),10.)));
}*/
if (num >= 100000.) { PUTC(diGlyph(mod(floor(num/100000.),10.))); }
if (num >= 10000.) { PUTC(diGlyph(mod(floor(num/10000.),10.))); }
if (num >= 1000.) { PUTC(diGlyph(mod(floor(num/1000.),10.))); }
if (num >= 100.) { PUTC(diGlyph(mod(floor(num/100.),10.))); }
if (num >= 10.) { PUTC(diGlyph(mod(floor(num/10.),10.))); }
@ -63,7 +65,7 @@ struct Kusok {
uint index_offset;
uint vertex_offset;
uint triangles;
vec4 emissive;
//vec4 emissive;
};
struct Vertex {
@ -108,11 +110,17 @@ float printText(in vec2 p) {
float col = 0.;
#define PUTN(n) printInt(n,pg,pc,lx,col)
if (pc.y == 0.) {
PUTC(C_N); PUTC(0.); PUTN(float(num_lighttextures));
} else if (pc.y <= float(num_lighttextures)) {
PUTN((pc.y-1.)); PUTC(0.); PUTN(float(lighttextures[int(pc.y-1.)]));
}
// if (pc.y == 0.) {
// PUTC(C_N); PUTC(0.); PUTN(float(num_lighttextures));
// } else if (pc.y <= float(num_lighttextures)) {
// PUTN((pc.y-1.)); PUTC(0.); PUTN(float(lighttextures[int(pc.y-1.)]));
// }
if (pc.y < 0.)
return 0.;
const int idx = int(pc.y);
const Kusok kusok = kusochki[idx];
PUTN(idx); PUTC(0.); PUTN(kusok.index_offset); PUTC(0.); PUTN(kusok.vertex_offset); PUTC(0.); PUTN(kusok.triangles);
return col;
}
@ -182,20 +190,22 @@ void main() {
vec3 pos = O+D*l;
C = fract(pos / 100.);
break;
const int instance_kusochki_offset = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true);
//const int instance_index = rayQueryGetIntersectionInstanceIdEXT(rayQuery, true);
const int instance_index = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true);
const int geom_index = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true);
const int kusok_index = instance_kusochki_offset + geom_index;
//C = fract(pos / 100.);
//C = vec3(hash(float(geom_index)), hash(float(geom_index)+15.43), hash(float(geom_index)+34.));
//break;
//rayQueryGetIntersectionGeometryIndexEXT
const int prim_index = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true);
const mat4x3 transform = rayQueryGetIntersectionObjectToWorldEXT(rayQuery, true);
const uint first_index_offset = kusochki[instance_index].index_offset + prim_index * 3;
const uint vi1 = uint(indices[first_index_offset+0]) + kusochki[instance_index].vertex_offset;
const uint vi2 = uint(indices[first_index_offset+1]) + kusochki[instance_index].vertex_offset;
const uint vi3 = uint(indices[first_index_offset+2]) + kusochki[instance_index].vertex_offset;
const uint first_index_offset = kusochki[kusok_index].index_offset + prim_index * 3;
const uint vi1 = uint(indices[first_index_offset+0]) + kusochki[kusok_index].vertex_offset;
const uint vi2 = uint(indices[first_index_offset+1]) + kusochki[kusok_index].vertex_offset;
const uint vi3 = uint(indices[first_index_offset+2]) + kusochki[kusok_index].vertex_offset;
const vec3 n1 = vertices[vi1].normal;
const vec3 n2 = vertices[vi2].normal;
const vec3 n3 = vertices[vi3].normal;
@ -203,15 +213,19 @@ void main() {
const vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rayQuery, true);
const vec3 normal = normalize(transpose(inverse(mat3(transform))) * (n1 * (1. - bary.x - bary.y) + n2 * bary.x + n3 * bary.y));
//C = normal * .5 + .5; break;
// TODO read from texture
const vec3 baseColor = vec3(1.);
// FIXME upload this data
#if 0
for (uint i = 0; i < num_lighttextures; ++i) {
const uint kusok_index = lighttextures[i];
const uint kusok_index_light = lighttextures[i];
const Kusok kusok = kusochki[kusok_index];
if (kusok_index == instance_index) {
if (kusok_index_light == kusok_index) {
// TODO do we need to do this when we have textures?
C += kc * vec3(hash(float(instance_index)), hash(float(instance_index)+15.43), hash(float(instance_index)+34.));//kusok.emissive.rgb;
C += kc * vec3(hash(float(kusok_index)), hash(float(kusok_index)+15.43), hash(float(kusok_index)+34.));//kusok.emissive.rgb;
continue;
}
@ -250,13 +264,14 @@ void main() {
// TODO
const float brightness_fudge = 1000.;
const vec3 emissive = kusochki[kusok_index].emissive.rgb;
C += brightness_fudge * kc * baseColor * emissive * dot(light_dir, normal) / (light_dist * light_dist);
//const vec3 emissive = kusochki[kusok_index].emissive.rgb;
//C += brightness_fudge * kc * baseColor * emissive * dot(light_dir, normal) / (light_dist * light_dist);
// Sample just one triangle
break;
}
}
#endif
//rand01_state = fract((pos.x + pos.y + pos.z)/100.) + uv.x + uv.y + pc.t;
for (uint i = 0; i < num_lights; ++i) {
@ -291,10 +306,10 @@ void main() {
C += kc * baseColor.rgb * light_color * dot_ld_norm * attenuation;
} // for all lights
const Kusok kusok = kusochki[instance_index];
if (any(greaterThan(kusok.emissive.rgb, vec3(0.)))) {
C += kc * vec3(hash(float(instance_index)-102.3), hash(float(instance_index)+15.43), hash(float(instance_index)+34.));//kusok.emissive.rgb;
}
//const Kusok kusok = kusochki[kusok_index];
// if (any(greaterThan(kusok.emissive.rgb, vec3(0.)))) {
// C += kc * vec3(hash(float(kusok_index)-102.3), hash(float(kusok_index)+15.43), hash(float(kusok_index)+34.));//kusok.emissive.rgb;
//}
kc *= .9;
const float rough = .4;
@ -307,6 +322,7 @@ void main() {
));
} // for all bounces
//C = mix(C, vec3(1.), printText(vec2(1.,-1.) * vec2(gl_GlobalInvocationID.xy) + vec2(0., imageSize(image).y)));
//C = mix(C, vec3(1.), printText(vec2(1.,-1.) * vec2(gl_GlobalInvocationID.xy) + vec2(0., imageSize(image).y)));
//if (gl_GlobalInvocationID.x > imageSize(image).x / 2)

View File

@ -622,7 +622,7 @@ static uint32_t writeDlightsToUBO( void )
ubo_lights = (vk_ubo_lights_t*)((byte*)(g_render.uniform_buffer.mapped) + ubo_lights_offset);
// TODO rtx and query light styles
#if 0
#if 1
for (int i = 0; i < g_render.num_static_lights && num_lights < ARRAYSIZE(ubo_lights->light); ++i) {
Vector4Set(
ubo_lights->light[num_lights].color,

View File

@ -74,6 +74,7 @@ typedef struct {
uint32_t element_count;
uint32_t index_offset, vertex_offset;
uint32_t vertex_count;
// TODO potentially dynamic int light_cluster;
} vk_render_geometry_t;
typedef struct vk_render_model_s {
@ -90,6 +91,7 @@ typedef struct vk_render_model_s {
struct {
VkAccelerationStructureKHR blas;
uint32_t kusochki_offset;
} rtx;
} vk_render_model_t;

View File

@ -15,6 +15,7 @@
#include <string.h>
#define MAX_ACCELS 1024
#define MAX_KUSOCHKI 8192
#define MAX_SCRATCH_BUFFER (16*1024*1024)
#define MAX_ACCELS_BUFFER (64*1024*1024)
#define MAX_LIGHT_TEXTURES 256
@ -39,8 +40,9 @@ typedef struct {
uint32_t index_offset;
uint32_t vertex_offset;
uint32_t triangles;
float sad_padding_[1];
vec4_t emissive;
//uint32_t leaf;
//float sad_padding_[1];
//vec4_t emissive;
} vk_kusok_data_t;
typedef struct {
@ -56,6 +58,7 @@ typedef struct {
typedef struct {
matrix3x4 transform_row;
VkAccelerationStructureKHR accel;
uint32_t kusochki_offset;
} vk_ray_model_t;
typedef struct {
@ -86,6 +89,7 @@ static struct {
// Data that is alive longer than one frame, usually within one map
struct {
uint32_t buffer_offset;
int num_kusochki;
} map;
// Per-frame data that is accumulated between RayFrameBegin and End calls
@ -200,6 +204,7 @@ void VK_RayNewMap( void ) {
ASSERT(vk_core.rtx);
g_rtx.map.buffer_offset = 0;
g_rtx.map.num_kusochki = 0;
}
void VK_RayFrameBegin( void )
@ -335,7 +340,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
const vk_ray_model_t* const model = g_rtx.frame.models + i;
ASSERT(model->accel != VK_NULL_HANDLE);
inst[i] = (VkAccelerationStructureInstanceKHR){
.instanceCustomIndex = i,
.instanceCustomIndex = model->kusochki_offset,
.mask = 0xff,
.instanceShaderBindingTableRecordOffset = 0,
.flags = 0,
@ -761,7 +766,7 @@ qboolean VK_RayInit( void )
return false;
}
if (!createBuffer(&g_rtx.kusochki_buffer, sizeof(vk_kusok_data_t) * MAX_ACCELS,
if (!createBuffer(&g_rtx.kusochki_buffer, sizeof(vk_kusok_data_t) * MAX_KUSOCHKI,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT /* | VK_BUFFER_USAGE_TRANSFER_DST_BIT */,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// FIXME complain, handle
@ -854,18 +859,35 @@ void VK_RayShutdown( void )
}
qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
VkAccelerationStructureGeometryKHR *geoms = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geoms));
uint32_t *geom_max_prim_counts = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_max_prim_counts));
VkAccelerationStructureBuildRangeInfoKHR *geom_build_ranges = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
VkAccelerationStructureBuildRangeInfoKHR **geom_build_ranges_ptr = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
VkAccelerationStructureGeometryKHR *geoms;
uint32_t *geom_max_prim_counts;
VkAccelerationStructureBuildRangeInfoKHR *geom_build_ranges;
VkAccelerationStructureBuildRangeInfoKHR **geom_build_ranges_ptr;
const VkDeviceAddress buffer_addr = getBufferDeviceAddress(args.buffer);
vk_kusok_data_t *kusochki;
qboolean result;
ASSERT(vk_core.rtx);
ASSERT(g_rtx.map.num_kusochki <= MAX_KUSOCHKI);
if (g_rtx.map.num_kusochki == MAX_KUSOCHKI) {
gEngine.Con_Printf(S_ERROR "Maximum number of kusochki exceeded\n");
return false;
}
geoms = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geoms));
geom_max_prim_counts = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_max_prim_counts));
geom_build_ranges = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
geom_build_ranges_ptr = Mem_Malloc(vk_core.pool, args.model->num_geometries * sizeof(*geom_build_ranges));
kusochki = (vk_kusok_data_t*)(g_rtx.kusochki_buffer.mapped) + g_rtx.map.num_kusochki;
args.model->rtx.kusochki_offset = g_rtx.map.num_kusochki;
for (int i = 0; i < args.model->num_geometries; ++i) {
const vk_render_geometry_t *mg = args.model->geometries + i;
const uint32_t prim_count = mg->element_count / 3;
const uint32_t vertex_offset = args.vertex_offset + mg->vertex_offset;
const uint32_t index_offset = args.index_offset + mg->index_offset;
geom_max_prim_counts[i] = prim_count;
geoms[i] = (VkAccelerationStructureGeometryKHR)
@ -880,8 +902,8 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
.maxVertex = mg->vertex_count,
.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT,
.vertexStride = sizeof(vk_vertex_t),
.vertexData.deviceAddress = buffer_addr + (args.vertex_offset + mg->vertex_offset) * sizeof(vk_vertex_t),
.indexData.deviceAddress = buffer_addr + (args.index_offset + mg->index_offset) * sizeof(uint16_t),
.vertexData.deviceAddress = buffer_addr + vertex_offset * sizeof(vk_vertex_t),
.indexData.deviceAddress = buffer_addr + index_offset * sizeof(uint16_t),
},
};
@ -890,37 +912,9 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
};
geom_build_ranges_ptr[i] = geom_build_ranges + i;
// Store geometry references in kusochki
// FIXME
#if 0
{
vk_kusok_data_t *kusok = (vk_kusok_data_t*)(g_rtx.kusochki_buffer.mapped) + g_rtx_scene.num_models;
kusok->vertex_offset = dynamic->vertex_offset;
kusok->index_offset = dynamic->index_offset;
ASSERT(dynamic->element_count % 3 == 0);
kusok->triangles = dynamic->element_count / 3;
ASSERT(dynamic->texture_id < MAX_TEXTURES);
if (dynamic->texture_id >= 0 && g_emissive_texture_table[dynamic->texture_id].set) {
VectorCopy(g_emissive_texture_table[dynamic->texture_id].emissive, kusok->emissive);
} else {
kusok->emissive[0] = dynamic->emissive.r;
kusok->emissive[1] = dynamic->emissive.g;
kusok->emissive[2] = dynamic->emissive.b;
}
if (kusok->emissive[0] > 0 || kusok->emissive[1] > 0 || kusok->emissive[2] > 0) {
if (g_rtx_scene.num_lighttextures < MAX_LIGHT_TEXTURES) {
vk_lighttexture_data_t *ltd = (vk_lighttexture_data_t*)g_rtx.lighttextures_buffer.mapped;
ltd->lighttexture[g_rtx_scene.num_lighttextures].kusok_index = g_rtx_scene.num_models;
g_rtx_scene.num_lighttextures++;
ltd->num_lighttextures = g_rtx_scene.num_lighttextures;
} else {
gEngine.Con_Printf(S_ERROR "Ran out of light textures space");
}
}
}
#endif
kusochki[i].vertex_offset = vertex_offset;
kusochki[i].index_offset = index_offset;
kusochki[i].triangles = prim_count;
}
{
@ -961,6 +955,10 @@ qboolean VK_RayModelInit( vk_ray_model_init_t args ) {
Mem_Free(geom_max_prim_counts);
Mem_Free(geoms);
if (result) {
g_rtx.map.num_kusochki += args.model->num_geometries;
}
return result;
}
@ -986,6 +984,7 @@ void VK_RayFrameAddModel( const struct vk_render_model_s *model, const matrix3x4
vk_ray_model_t* ray_model = g_rtx.frame.models + g_rtx.frame.num_models;
ASSERT(model->rtx.blas != VK_NULL_HANDLE);
ray_model->accel = model->rtx.blas;
ray_model->kusochki_offset = model->rtx.kusochki_offset;
memcpy(ray_model->transform_row, *transform_row, sizeof(ray_model->transform_row));
g_rtx.frame.num_models++;
}