xash3d-fwgs/ref_vk/vk_rtx.c
Ivan Avdeev 19b8025fc6 rtx: add light clusters debug helpers
1. add a way to enable only certain lights
2. add commented out code to highligh affected light clusters, culling
   modes, etc.
3. add affected light clusters counter when collecting lights
2021-10-13 12:27:24 -07:00

1086 lines
39 KiB
C

#include "vk_rtx.h"
#include "vk_core.h"
#include "vk_common.h"
#include "vk_buffer.h"
#include "vk_pipeline.h"
#include "vk_cvar.h"
#include "vk_textures.h"
#include "vk_light.h"
#include "vk_descriptor.h"
#include "vk_ray_internal.h"
#include "eiface.h"
#include "xash3d_mathlib.h"
#include <string.h>
#define MAX_SCRATCH_BUFFER (32*1024*1024)
#define MAX_ACCELS_BUFFER (64*1024*1024)
#define MAX_LIGHT_LEAVES 8192
enum {
ShaderBindingTable_RayGen,
ShaderBindingTable_Miss,
ShaderBindingTable_Miss_Shadow,
ShaderBindingTable_Hit,
ShaderBindingTable_HitWithAlphaMask,
ShaderBindingTable_COUNT
};
// TODO settings/realtime modifiable/adaptive
#define FRAME_WIDTH 1280
#define FRAME_HEIGHT 720
// TODO sync with shaders
// TODO optimal values
#define WG_W 16
#define WG_H 8
typedef struct {
vec3_t pos;
float radius;
vec3_t color;
float padding_;
} vk_light_t;
typedef struct PushConstants vk_rtx_push_constants_t;
typedef struct {
int min_cell[4], size[3]; // 4th element is padding
vk_lights_cell_t cells[MAX_LIGHT_CLUSTERS];
} vk_ray_shader_light_grid;
enum {
RayDescBinding_DestImage = 0,
RayDescBinding_TLAS = 1,
RayDescBinding_UBOMatrices = 2,
RayDescBinding_Kusochki = 3,
RayDescBinding_Indices = 4,
RayDescBinding_Vertices = 5,
RayDescBinding_Textures = 6,
RayDescBinding_Lights = 7,
RayDescBinding_LightClusters = 8,
RayDescBinding_PrevFrame = 9,
RayDescBinding_COUNT
};
static struct {
vk_descriptors_t descriptors;
VkDescriptorSetLayoutBinding desc_bindings[RayDescBinding_COUNT];
vk_descriptor_value_t desc_values[RayDescBinding_COUNT];
VkDescriptorSet desc_sets[1];
VkPipeline pipeline;
// Shader binding table buffer
vk_buffer_t sbt_buffer;
uint32_t sbt_record_size;
// Stores AS built data. Lifetime similar to render buffer:
// - some portion lives for entire map lifetime
// - some portion lives only for a single frame (may have several frames in flight)
// TODO: unify this with render buffer
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
vk_buffer_t accels_buffer;
vk_ring_buffer_t accels_buffer_alloc;
// Temp: lives only during a single frame (may have many in flight)
// Used for building ASes;
// Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT
vk_buffer_t scratch_buffer;
VkDeviceAddress accels_buffer_addr, scratch_buffer_addr;
// Temp-ish: used for making TLAS, contains addressed to all used BLASes
// Lifetime and nature of usage similar to scratch_buffer
// TODO: unify them
// Needs: SHADER_DEVICE_ADDRESS, STORAGE_BUFFER, AS_BUILD_INPUT_READ_ONLY
vk_buffer_t tlas_geom_buffer;
// Planned to contain seveal types of data:
// - grid structure itself
// - lights data:
// - dlights (fully dynamic)
// - entity lights (can be dynamic with light styles)
// - surface lights (map geometry is static, however brush models can have them too and move around (e.g. wagonchik and elevators))
// Therefore, this is also dynamic and lifetime is per-frame
// TODO: unify with scratch buffer
// Needs: STORAGE_BUFFER
// Can be potentially crated using compute shader (would need shader write bit)
vk_buffer_t light_grid_buffer;
// TODO need several TLASes for N frames in flight
VkAccelerationStructureKHR tlas;
// Per-frame data that is accumulated between RayFrameBegin and End calls
struct {
uint32_t scratch_offset; // for building dynamic blases
} frame;
unsigned frame_number;
vk_image_t frames[2];
qboolean reload_pipeline;
qboolean reload_lighting;
} g_rtx = {0};
VkDeviceAddress getBufferDeviceAddress(VkBuffer buffer) {
const VkBufferDeviceAddressInfo bdai = {.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, .buffer = buffer};
return vkGetBufferDeviceAddress(vk_core.device, &bdai);
}
static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
VkAccelerationStructureDeviceAddressInfoKHR asdai = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
.accelerationStructure = as,
};
return vkGetAccelerationStructureDeviceAddressKHR(vk_core.device, &asdai);
}
// TODO split this into smaller building blocks in a separate module
qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_build_args_t *args, vk_ray_model_t *model) {
qboolean should_create = *args->p_accel == VK_NULL_HANDLE;
#if 1 // update does not work at all on AMD gpus
qboolean is_update = false; // FIXME this crashes for some reason !should_create && args->dynamic;
#else
qboolean is_update = !should_create && args->dynamic;
#endif
VkAccelerationStructureBuildGeometryInfoKHR build_info = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
.type = args->type,
.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR | ( args->dynamic ? VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR : 0),
.mode = is_update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
.geometryCount = args->n_geoms,
.pGeometries = args->geoms,
.srcAccelerationStructure = is_update ? *args->p_accel : VK_NULL_HANDLE,
};
VkAccelerationStructureBuildSizesInfoKHR build_size = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR
};
uint32_t scratch_buffer_size = 0;
ASSERT(args->geoms);
ASSERT(args->n_geoms > 0);
ASSERT(args->p_accel);
vkGetAccelerationStructureBuildSizesKHR(
vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info, args->max_prim_counts, &build_size);
scratch_buffer_size = is_update ? build_size.updateScratchSize : build_size.buildScratchSize;
#if 0
{
uint32_t max_prims = 0;
for (int i = 0; i < args->n_geoms; ++i)
max_prims += args->max_prim_counts[i];
gEngine.Con_Reportf(
"AS max_prims=%u, n_geoms=%u, build size: %d, scratch size: %d\n", max_prims, args->n_geoms, build_size.accelerationStructureSize, build_size.buildScratchSize);
}
#endif
if (MAX_SCRATCH_BUFFER < g_rtx.frame.scratch_offset + scratch_buffer_size) {
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
MAX_SCRATCH_BUFFER - g_rtx.frame.scratch_offset,
scratch_buffer_size);
return false;
}
if (should_create) {
const uint32_t as_size = build_size.accelerationStructureSize;
const uint32_t buffer_offset = VK_RingBuffer_Alloc(&g_rtx.accels_buffer_alloc, as_size, 256);
const VkAccelerationStructureCreateInfoKHR asci = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
.buffer = g_rtx.accels_buffer.buffer,
.offset = buffer_offset,
.type = args->type,
.size = as_size,
};
if (buffer_offset == AllocFailed) {
gEngine.Con_Printf(S_ERROR "Failed to allocated %u bytes for accel buffer\n", asci.size);
return false;
}
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, args->p_accel));
SET_DEBUG_NAME(*args->p_accel, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, args->debug_name);
if (model) {
model->size = asci.size;
model->debug.as_offset = buffer_offset;
}
// gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x\n", *args->p_accel, args->n_geoms, buffer_offset, asci.size, buffer_offset + asci.size);
}
// If not enough data for building, just create
if (!cmdbuf || !args->build_ranges)
return true;
if (model) {
ASSERT(model->size >= build_size.accelerationStructureSize);
}
build_info.dstAccelerationStructure = *args->p_accel;
build_info.scratchData.deviceAddress = g_rtx.scratch_buffer_addr + g_rtx.frame.scratch_offset;
//uint32_t scratch_offset_initial = g_rtx.frame.scratch_offset;
g_rtx.frame.scratch_offset += scratch_buffer_size;
g_rtx.frame.scratch_offset = ALIGN_UP(g_rtx.frame.scratch_offset, vk_core.physical_device.properties_accel.minAccelerationStructureScratchOffsetAlignment);
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
vkCmdBuildAccelerationStructuresKHR(cmdbuf, 1, &build_info, &args->build_ranges);
return true;
}
static void createTlas( VkCommandBuffer cmdbuf ) {
const VkAccelerationStructureGeometryKHR tl_geom[] = {
{
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
//.flags = VK_GEOMETRY_OPAQUE_BIT,
.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR,
.geometry.instances =
(VkAccelerationStructureGeometryInstancesDataKHR){
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,
.data.deviceAddress = getBufferDeviceAddress(g_rtx.tlas_geom_buffer.buffer),
.arrayOfPointers = VK_FALSE,
},
},
};
const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = { MAX_ACCELS }; //cmdbuf == VK_NULL_HANDLE ? MAX_ACCELS : g_ray_model_state.frame.num_models };
const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = {
.primitiveCount = g_ray_model_state.frame.num_models,
};
const as_build_args_t asrgs = {
.geoms = tl_geom,
.max_prim_counts = tl_max_prim_counts,
.build_ranges = cmdbuf == VK_NULL_HANDLE ? NULL : &tl_build_range,
.n_geoms = ARRAYSIZE(tl_geom),
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
// we can't really rebuild TLAS because instance count changes are not allowed .dynamic = true,
.dynamic = false,
.p_accel = &g_rtx.tlas,
.debug_name = "TLAS",
};
if (!createOrUpdateAccelerationStructure(cmdbuf, &asrgs, NULL)) {
gEngine.Host_Error("Could not create/update TLAS\n");
return;
}
}
void VK_RayNewMap( void ) {
ASSERT(vk_core.rtx);
VK_RingBuffer_Clear(&g_rtx.accels_buffer_alloc);
VK_RingBuffer_Clear(&g_ray_model_state.kusochki_alloc);
// Clear model cache
for (int i = 0; i < ARRAYSIZE(g_ray_model_state.models_cache); ++i) {
vk_ray_model_t *model = g_ray_model_state.models_cache + i;
VK_RayModelDestroy(model);
}
// Recreate tlas
// Why here and not in init: to make sure that its memory is preserved. Map init will clear all memory regions.
{
if (g_rtx.tlas != VK_NULL_HANDLE) {
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.tlas, NULL);
g_rtx.tlas = VK_NULL_HANDLE;
}
createTlas(VK_NULL_HANDLE);
}
}
void VK_RayMapLoadEnd( void ) {
VK_RingBuffer_Fix(&g_rtx.accels_buffer_alloc);
VK_RingBuffer_Fix(&g_ray_model_state.kusochki_alloc);
}
void VK_RayFrameBegin( void )
{
ASSERT(vk_core.rtx);
g_rtx.frame.scratch_offset = 0;
if (g_ray_model_state.freeze_models)
return;
XVK_RayModel_ClearForNextFrame();
if (g_rtx.reload_lighting) {
g_rtx.reload_lighting = false;
VK_LightsLoadMapStaticLights();
}
// TODO shouldn't we do this in freeze models mode anyway?
VK_LightsFrameInit();
}
static void createPipeline( void )
{
struct RayShaderSpec {
int max_point_lights;
int max_emissive_kusochki;
uint32_t max_visible_point_lights;
uint32_t max_visible_surface_lights;
float light_grid_cell_size;
int max_light_clusters;
} spec_data = {
.max_point_lights = MAX_POINT_LIGHTS,
.max_emissive_kusochki = MAX_EMISSIVE_KUSOCHKI,
.max_visible_point_lights = MAX_VISIBLE_POINT_LIGHTS,
.max_visible_surface_lights = MAX_VISIBLE_SURFACE_LIGHTS,
.light_grid_cell_size = LIGHT_GRID_CELL_SIZE,
.max_light_clusters = MAX_LIGHT_CLUSTERS,
};
const VkSpecializationMapEntry spec_map[] = {
{.constantID = 0, .offset = offsetof(struct RayShaderSpec, max_point_lights), .size = sizeof(int) },
{.constantID = 1, .offset = offsetof(struct RayShaderSpec, max_emissive_kusochki), .size = sizeof(int) },
{.constantID = 2, .offset = offsetof(struct RayShaderSpec, max_visible_point_lights), .size = sizeof(uint32_t) },
{.constantID = 3, .offset = offsetof(struct RayShaderSpec, max_visible_surface_lights), .size = sizeof(uint32_t) },
{.constantID = 4, .offset = offsetof(struct RayShaderSpec, light_grid_cell_size), .size = sizeof(float) },
{.constantID = 5, .offset = offsetof(struct RayShaderSpec, max_light_clusters), .size = sizeof(int) },
};
VkSpecializationInfo spec = {
.mapEntryCount = ARRAYSIZE(spec_map),
.pMapEntries = spec_map,
.dataSize = sizeof(spec_data),
.pData = &spec_data,
};
enum {
ShaderStageIndex_RayGen,
ShaderStageIndex_Miss,
ShaderStageIndex_Miss_Shadow,
ShaderStageIndex_ClosestHit,
ShaderStageIndex_AnyHit_AlphaMask,
ShaderStageIndex_COUNT,
};
#define DEFINE_SHADER(filename, bit, index) \
shaders[ShaderStageIndex_##index] = (VkPipelineShaderStageCreateInfo){ \
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, \
.stage = VK_SHADER_STAGE_##bit##_BIT_KHR, \
.module = loadShader(filename), \
.pName = "main", \
.pSpecializationInfo = &spec, \
}
VkPipelineShaderStageCreateInfo shaders[ShaderStageIndex_COUNT];
VkRayTracingShaderGroupCreateInfoKHR shader_groups[ShaderBindingTable_COUNT];
const VkRayTracingPipelineCreateInfoKHR rtpci = {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR,
//TODO .flags = VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR ....
.stageCount = ARRAYSIZE(shaders),
.pStages = shaders,
.groupCount = ARRAYSIZE(shader_groups),
.pGroups = shader_groups,
.maxPipelineRayRecursionDepth = 1,
.layout = g_rtx.descriptors.pipeline_layout,
};
DEFINE_SHADER("ray.rgen.spv", RAYGEN, RayGen);
DEFINE_SHADER("ray.rmiss.spv", MISS, Miss);
DEFINE_SHADER("shadow.rmiss.spv", MISS, Miss_Shadow);
DEFINE_SHADER("ray.rchit.spv", CLOSEST_HIT, ClosestHit);
DEFINE_SHADER("alphamask.rahit.spv", ANY_HIT, AnyHit_AlphaMask);
shader_groups[ShaderBindingTable_RayGen] = (VkRayTracingShaderGroupCreateInfoKHR) {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
.anyHitShader = VK_SHADER_UNUSED_KHR,
.closestHitShader = VK_SHADER_UNUSED_KHR,
.generalShader = ShaderStageIndex_RayGen,
.intersectionShader = VK_SHADER_UNUSED_KHR,
};
shader_groups[ShaderBindingTable_Miss] = (VkRayTracingShaderGroupCreateInfoKHR) {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
.anyHitShader = VK_SHADER_UNUSED_KHR,
.closestHitShader = VK_SHADER_UNUSED_KHR,
.generalShader = ShaderStageIndex_Miss,
.intersectionShader = VK_SHADER_UNUSED_KHR,
};
shader_groups[ShaderBindingTable_Miss_Shadow] = (VkRayTracingShaderGroupCreateInfoKHR) {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,
.anyHitShader = VK_SHADER_UNUSED_KHR,
.closestHitShader = VK_SHADER_UNUSED_KHR,
.generalShader = ShaderStageIndex_Miss_Shadow,
.intersectionShader = VK_SHADER_UNUSED_KHR,
};
shader_groups[ShaderBindingTable_Hit] = (VkRayTracingShaderGroupCreateInfoKHR) {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR,
.anyHitShader = VK_SHADER_UNUSED_KHR,
.closestHitShader = ShaderStageIndex_ClosestHit,
.generalShader = VK_SHADER_UNUSED_KHR,
.intersectionShader = VK_SHADER_UNUSED_KHR,
};
shader_groups[ShaderBindingTable_HitWithAlphaMask] = (VkRayTracingShaderGroupCreateInfoKHR) {
.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR,
.anyHitShader = ShaderStageIndex_AnyHit_AlphaMask,
.closestHitShader = ShaderStageIndex_ClosestHit,
.generalShader = VK_SHADER_UNUSED_KHR,
.intersectionShader = VK_SHADER_UNUSED_KHR,
};
XVK_CHECK(vkCreateRayTracingPipelinesKHR(vk_core.device, VK_NULL_HANDLE, g_pipeline_cache, 1, &rtpci, NULL, &g_rtx.pipeline));
ASSERT(g_rtx.pipeline != VK_NULL_HANDLE);
{
const uint32_t sbt_handle_size = vk_core.physical_device.properties_ray_tracing_pipeline.shaderGroupHandleSize;
const uint32_t sbt_handles_buffer_size = ARRAYSIZE(shader_groups) * sbt_handle_size;
uint8_t *sbt_handles = Mem_Malloc(vk_core.pool, sbt_handles_buffer_size);
XVK_CHECK(vkGetRayTracingShaderGroupHandlesKHR(vk_core.device, g_rtx.pipeline, 0, ARRAYSIZE(shader_groups), sbt_handles_buffer_size, sbt_handles));
for (int i = 0; i < ARRAYSIZE(shader_groups); ++i)
{
uint8_t *sbt_dst = g_rtx.sbt_buffer.mapped;
memcpy(sbt_dst + g_rtx.sbt_record_size * i, sbt_handles + sbt_handle_size * i, sbt_handle_size);
}
Mem_Free(sbt_handles);
}
for (int i = 0; i < ARRAYSIZE(shaders); ++i)
vkDestroyShaderModule(vk_core.device, shaders[i].module, NULL);
}
static void prepareTlas( VkCommandBuffer cmdbuf ) {
ASSERT(g_ray_model_state.frame.num_models > 0);
// Upload all blas instances references to GPU mem
{
VkAccelerationStructureInstanceKHR* inst = g_rtx.tlas_geom_buffer.mapped;
for (int i = 0; i < g_ray_model_state.frame.num_models; ++i) {
const vk_ray_draw_model_t* const model = g_ray_model_state.frame.models + i;
ASSERT(model->model);
ASSERT(model->model->as != VK_NULL_HANDLE);
inst[i] = (VkAccelerationStructureInstanceKHR){
.instanceCustomIndex = model->model->kusochki_offset,
.mask = (model->translucent ? 0 : GEOMETRY_BIT_OPAQUE) | GEOMETRY_BIT_ANY,
.instanceShaderBindingTableRecordOffset = model->alpha_test ? 1 : 0,
.flags = model->render_mode == kRenderNormal ? VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR : VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR, // TODO is render_mode a good indicator of transparency in general case?
.accelerationStructureReference = getASAddress(model->model->as), // TODO cache this addr
};
memcpy(&inst[i].transform, model->transform_row, sizeof(VkTransformMatrixKHR));
}
}
// Barrier for building all BLASes
// BLAS building is now in cmdbuf, need to synchronize with results
{
VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, // | VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR,
.buffer = g_rtx.accels_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
} };
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
// 2. Build TLAS
createTlas(cmdbuf);
}
static void updateDescriptors( VkCommandBuffer cmdbuf, const vk_ray_frame_render_args_t *args, const vk_image_t *frame_src, const vk_image_t *frame_dst ) {
// 3. Update descriptor sets (bind dest image, tlas, projection matrix)
VkDescriptorImageInfo dii_all_textures[MAX_TEXTURES];
g_rtx.desc_values[RayDescBinding_DestImage].image = (VkDescriptorImageInfo){
.sampler = VK_NULL_HANDLE,
.imageView = frame_dst->view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
g_rtx.desc_values[RayDescBinding_PrevFrame].image = (VkDescriptorImageInfo){
.sampler = VK_NULL_HANDLE,
.imageView = frame_src->view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
g_rtx.desc_values[RayDescBinding_TLAS].accel = (VkWriteDescriptorSetAccelerationStructureKHR){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
.accelerationStructureCount = 1,
.pAccelerationStructures = &g_rtx.tlas,
};
g_rtx.desc_values[RayDescBinding_UBOMatrices].buffer = (VkDescriptorBufferInfo){
.buffer = args->ubo.buffer,
.offset = args->ubo.offset,
.range = args->ubo.size,
};
g_rtx.desc_values[RayDescBinding_Kusochki].buffer = (VkDescriptorBufferInfo){
.buffer = g_ray_model_state.kusochki_buffer.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE, // TODO fails validation when empty g_rtx_scene.num_models * sizeof(vk_kusok_data_t),
};
g_rtx.desc_values[RayDescBinding_Indices].buffer = (VkDescriptorBufferInfo){
.buffer = args->geometry_data.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE, // TODO fails validation when empty args->geometry_data.size,
};
g_rtx.desc_values[RayDescBinding_Vertices].buffer = (VkDescriptorBufferInfo){
.buffer = args->geometry_data.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE, // TODO fails validation when empty args->geometry_data.size,
};
g_rtx.desc_values[RayDescBinding_Textures].image_array = dii_all_textures;
// TODO: move this to vk_texture.c
for (int i = 0; i < MAX_TEXTURES; ++i) {
const vk_texture_t *texture = findTexture(i);
const qboolean exists = texture->vk.image_view != VK_NULL_HANDLE;
dii_all_textures[i].sampler = vk_core.default_sampler; // FIXME on AMD using pImmutableSamplers leads to NEAREST filtering ??. VK_NULL_HANDLE;
dii_all_textures[i].imageView = exists ? texture->vk.image_view : findTexture(tglob.defaultTexture)->vk.image_view;
ASSERT(dii_all_textures[i].imageView != VK_NULL_HANDLE);
dii_all_textures[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
g_rtx.desc_values[RayDescBinding_Lights].buffer = (VkDescriptorBufferInfo){
.buffer = g_ray_model_state.lights_buffer.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
g_rtx.desc_values[RayDescBinding_LightClusters].buffer = (VkDescriptorBufferInfo){
.buffer = g_rtx.light_grid_buffer.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
VK_DescriptorsWrite(&g_rtx.descriptors);
}
static qboolean rayTrace( VkCommandBuffer cmdbuf, VkImage frame_dst, float fov_angle_y )
{
// 4. Barrier for TLAS build and dest image layout transfer
{
VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.buffer = g_rtx.accels_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
} };
VkImageMemoryBarrier image_barrier[] = { {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = frame_dst,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.subresourceRange = (VkImageSubresourceRange) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
}} };
vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
0, NULL, ARRAYSIZE(bmb), bmb, ARRAYSIZE(image_barrier), image_barrier);
}
// 4. dispatch ray tracing
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, g_rtx.pipeline);
{
vk_rtx_push_constants_t push_constants = {
//.t = gpGlobals->realtime,
.random_seed = (uint32_t)gEngine.COM_RandomLong(0, INT32_MAX),
.bounces = vk_rtx_bounces->value,
.prev_frame_blend_factor = vk_rtx_prev_frame_blend_factor->value,
.pixel_cone_spread_angle = atanf((2.0f*tanf(fov_angle_y * 0.5f)) / (float)FRAME_HEIGHT),
.debug_light_index_begin = (uint32_t)(vk_rtx_light_begin->value),
.debug_light_index_end = (uint32_t)(vk_rtx_light_end->value),
};
vkCmdPushConstants(cmdbuf, g_rtx.descriptors.pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push_constants), &push_constants);
}
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, g_rtx.descriptors.pipeline_layout, 0, 1, g_rtx.descriptors.desc_sets + 0, 0, NULL);
{
const uint32_t sbt_record_size = g_rtx.sbt_record_size;
//const uint32_t sbt_record_size = vk_core.physical_device.properties_ray_tracing_pipeline.shaderGroupHandleSize;
#define SBT_INDEX(index, count) { \
.deviceAddress = getBufferDeviceAddress(g_rtx.sbt_buffer.buffer) + g_rtx.sbt_record_size * index, \
.size = sbt_record_size * count, \
.stride = sbt_record_size, \
}
const VkStridedDeviceAddressRegionKHR sbt_raygen = SBT_INDEX(ShaderBindingTable_RayGen, 1);
const VkStridedDeviceAddressRegionKHR sbt_miss = SBT_INDEX(ShaderBindingTable_Miss, 2);
const VkStridedDeviceAddressRegionKHR sbt_hit = SBT_INDEX(ShaderBindingTable_Hit, 2);
const VkStridedDeviceAddressRegionKHR sbt_callable = { 0 };
vkCmdTraceRaysKHR(cmdbuf, &sbt_raygen, &sbt_miss, &sbt_hit, &sbt_callable, FRAME_WIDTH, FRAME_HEIGHT, 1 );
}
return true;
}
// Finalize and update dynamic lights
static void updateLights( void )
{
VK_LightsFrameFinalize();
// Upload light grid
{
vk_ray_shader_light_grid *grid = g_rtx.light_grid_buffer.mapped;
ASSERT(g_lights.map.grid_cells <= MAX_LIGHT_CLUSTERS);
VectorCopy(g_lights.map.grid_min_cell, grid->min_cell);
VectorCopy(g_lights.map.grid_size, grid->size);
memcpy(grid->cells, g_lights.cells, g_lights.map.grid_cells * sizeof(vk_lights_cell_t));
}
// Upload dynamic emissive kusochki
{
vk_lights_buffer_t *ek = g_ray_model_state.lights_buffer.mapped;
ASSERT(g_lights.num_emissive_surfaces <= MAX_EMISSIVE_KUSOCHKI);
ek->num_kusochki = g_lights.num_emissive_surfaces;
for (int i = 0; i < g_lights.num_emissive_surfaces; ++i) {
ek->kusochki[i].kusok_index = g_lights.emissive_surfaces[i].kusok_index;
Matrix3x4_Copy(ek->kusochki[i].transform, g_lights.emissive_surfaces[i].transform);
}
ek->num_point_lights = g_lights.num_point_lights;
for (int i = 0; i < g_lights.num_point_lights; ++i) {
Vector4Copy(g_lights.point_lights[i].origin, ek->point_lights[i].position);
Vector4Copy(g_lights.point_lights[i].color, ek->point_lights[i].color);
}
//VectorCopy(g_lights.map.sun_color, ek->sun_color);
VectorScale(g_lights.map.sun_color, 50, ek->sun_color);
VectorCopy(g_lights.map.sun_dir, ek->sun_dir);
}
}
static void blitImage( VkCommandBuffer cmdbuf, VkImage src, VkImage dst, int src_width, int src_height, int dst_width, int dst_height )
{
// Blit raytraced image to frame buffer
{
VkImageBlit region = {0};
region.srcOffsets[1].x = src_width;
region.srcOffsets[1].y = src_height;
region.srcOffsets[1].z = 1;
region.dstOffsets[1].x = dst_width;
region.dstOffsets[1].y = dst_height;
region.dstOffsets[1].z = 1;
region.srcSubresource.aspectMask = region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.srcSubresource.layerCount = region.dstSubresource.layerCount = 1;
vkCmdBlitImage(cmdbuf, src, VK_IMAGE_LAYOUT_GENERAL,
dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region,
VK_FILTER_NEAREST);
}
{
VkImageMemoryBarrier image_barriers[] = {
{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = dst,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.subresourceRange =
(VkImageSubresourceRange){
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
}};
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
0, 0, NULL, 0, NULL, ARRAYSIZE(image_barriers), image_barriers);
}
}
static void clearVkImage( VkCommandBuffer cmdbuf, VkImage image ) {
const VkImageMemoryBarrier image_barriers[] = { {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = image,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.subresourceRange = (VkImageSubresourceRange) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
}} };
const VkClearColorValue clear_value = {0};
vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
0, NULL, 0, NULL, ARRAYSIZE(image_barriers), image_barriers);
vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &image_barriers->subresourceRange);
}
void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
{
const VkCommandBuffer cmdbuf = args->cmdbuf;
const vk_image_t* frame_src = g_rtx.frames + ((g_rtx.frame_number + 1) % 2);
const vk_image_t* frame_dst = g_rtx.frames + (g_rtx.frame_number % 2);
ASSERT(vk_core.rtx);
// ubo should contain two matrices
// FIXME pass these matrices explicitly to let RTX module handle ubo itself
ASSERT(args->ubo.size == sizeof(float) * 16 * 2);
g_rtx.frame_number++;
if (vk_core.debug)
XVK_RayModel_Validate();
if (g_rtx.reload_pipeline) {
gEngine.Con_Printf(S_WARN "Reloading RTX shaders/pipelines\n");
// TODO gracefully handle reload errors: need to change createPipeline, loadShader, VK_PipelineCreate...
vkDestroyPipeline(vk_core.device, g_rtx.pipeline, NULL);
createPipeline();
g_rtx.reload_pipeline = false;
}
updateLights();
if (g_ray_model_state.frame.num_models == 0)
{
clearVkImage( cmdbuf, frame_dst->image );
{
// Prepare destination image for writing
const VkImageMemoryBarrier image_barriers[] = {{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = args->dst.image,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.subresourceRange =
(VkImageSubresourceRange){
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
}};
vkCmdPipelineBarrier(args->cmdbuf,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, ARRAYSIZE(image_barriers), image_barriers);
}
} else {
prepareTlas(cmdbuf);
updateDescriptors(cmdbuf, args, frame_src, frame_dst);
rayTrace(cmdbuf, frame_dst->image, args->fov_angle_y);
// Barrier for frame_dst image
{
const VkImageMemoryBarrier image_barriers[] = {
{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = frame_dst->image,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.subresourceRange =
(VkImageSubresourceRange){
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
},
{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = args->dst.image,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.subresourceRange =
(VkImageSubresourceRange){
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
}};
vkCmdPipelineBarrier(args->cmdbuf,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, ARRAYSIZE(image_barriers), image_barriers);
}
}
// Blit RTX frame onto swapchain image
blitImage(cmdbuf, frame_src->image, args->dst.image, FRAME_WIDTH, FRAME_HEIGHT, args->dst.width, args->dst.height);
}
static void createLayouts( void ) {
//VkSampler samplers[MAX_TEXTURES];
g_rtx.descriptors.bindings = g_rtx.desc_bindings;
g_rtx.descriptors.num_bindings = ARRAYSIZE(g_rtx.desc_bindings);
g_rtx.descriptors.values = g_rtx.desc_values;
g_rtx.descriptors.num_sets = 1;
g_rtx.descriptors.desc_sets = g_rtx.desc_sets;
g_rtx.descriptors.push_constants = (VkPushConstantRange){
.offset = 0,
.size = sizeof(vk_rtx_push_constants_t),
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_DestImage] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_DestImage,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_TLAS] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_TLAS,
.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_UBOMatrices] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_UBOMatrices,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_Kusochki] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_Kusochki,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_Indices] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_Indices,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_Vertices] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_Vertices,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_Textures] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_Textures,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = MAX_TEXTURES,
.stageFlags = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
// FIXME on AMD using immutable samplers leads to nearest filtering ???!
.pImmutableSamplers = NULL, //samplers,
};
// for (int i = 0; i < ARRAYSIZE(samplers); ++i)
// samplers[i] = vk_core.default_sampler;
g_rtx.desc_bindings[RayDescBinding_Lights] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_Lights,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_LightClusters] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_LightClusters,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR,
};
g_rtx.desc_bindings[RayDescBinding_PrevFrame] = (VkDescriptorSetLayoutBinding){
.binding = RayDescBinding_PrevFrame,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR,
};
VK_DescriptorsCreate(&g_rtx.descriptors);
}
static void reloadPipeline( void ) {
g_rtx.reload_pipeline = true;
}
static void reloadLighting( void ) {
g_rtx.reload_lighting = true;
}
static void freezeModels( void ) {
g_ray_model_state.freeze_models = !g_ray_model_state.freeze_models;
}
qboolean VK_RayInit( void )
{
ASSERT(vk_core.rtx);
// TODO complain and cleanup on failure
//g_rtx.sbt_record_size = ALIGN_UP(vk_core.physical_device.properties_ray_tracing_pipeline.shaderGroupHandleSize, vk_core.physical_device.properties_ray_tracing_pipeline.shaderGroupHandleAlignment);
g_rtx.sbt_record_size = ALIGN_UP(vk_core.physical_device.properties_ray_tracing_pipeline.shaderGroupHandleSize, vk_core.physical_device.properties_ray_tracing_pipeline.shaderGroupBaseAlignment);
if (!createBuffer("ray sbt_buffer", &g_rtx.sbt_buffer, ShaderBindingTable_COUNT * g_rtx.sbt_record_size,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
{
return false;
}
if (!createBuffer("ray accels_buffer", &g_rtx.accels_buffer, MAX_ACCELS_BUFFER,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
))
{
return false;
}
g_rtx.accels_buffer_addr = getBufferDeviceAddress(g_rtx.accels_buffer.buffer);
g_rtx.accels_buffer_alloc.size = g_rtx.accels_buffer.size;
if (!createBuffer("ray scratch_buffer", &g_rtx.scratch_buffer, MAX_SCRATCH_BUFFER,
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
)) {
return false;
}
g_rtx.scratch_buffer_addr = getBufferDeviceAddress(g_rtx.scratch_buffer.buffer);
if (!createBuffer("ray tlas_geom_buffer", &g_rtx.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_ACCELS,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// FIXME complain, handle
return false;
}
if (!createBuffer("ray kusochki_buffer", &g_ray_model_state.kusochki_buffer, sizeof(vk_kusok_data_t) * MAX_KUSOCHKI,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT /* | VK_BUFFER_USAGE_TRANSFER_DST_BIT */,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// FIXME complain, handle
return false;
}
g_ray_model_state.kusochki_alloc.size = MAX_KUSOCHKI;
if (!createBuffer("ray lights_buffer", &g_ray_model_state.lights_buffer, sizeof(vk_lights_buffer_t),
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT /* | VK_BUFFER_USAGE_TRANSFER_DST_BIT */,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// FIXME complain, handle
return false;
}
if (!createBuffer("ray light_grid_buffer", &g_rtx.light_grid_buffer, sizeof(vk_ray_shader_light_grid),
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT /* | VK_BUFFER_USAGE_TRANSFER_DST_BIT */,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// FIXME complain, handle
return false;
}
createLayouts();
createPipeline();
for (int i = 0; i < ARRAYSIZE(g_rtx.frames); ++i) {
g_rtx.frames[i] = VK_ImageCreate(FRAME_WIDTH, FRAME_HEIGHT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
}
// Start with black previous frame
{
const VkCommandBufferBeginInfo beginfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
XVK_CHECK(vkBeginCommandBuffer(vk_core.cb, &beginfo));
clearVkImage( vk_core.cb, g_rtx.frames[1].image );
XVK_CHECK(vkEndCommandBuffer(vk_core.cb));
{
const VkSubmitInfo subinfo = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &vk_core.cb,
};
XVK_CHECK(vkQueueSubmit(vk_core.queue, 1, &subinfo, VK_NULL_HANDLE));
XVK_CHECK(vkQueueWaitIdle(vk_core.queue));
}
}
if (vk_core.debug) {
gEngine.Cmd_AddCommand("vk_rtx_reload", reloadPipeline, "Reload RTX shader");
gEngine.Cmd_AddCommand("vk_rtx_reload_rad", reloadLighting, "Reload RAD files for static lights");
gEngine.Cmd_AddCommand("vk_rtx_freeze", freezeModels, "Freeze models, do not update/add/delete models from to-draw list");
}
return true;
}
void VK_RayShutdown( void )
{
ASSERT(vk_core.rtx);
for (int i = 0; i < ARRAYSIZE(g_rtx.frames); ++i)
VK_ImageDestroy(g_rtx.frames + i);
vkDestroyPipeline(vk_core.device, g_rtx.pipeline, NULL);
VK_DescriptorsDestroy(&g_rtx.descriptors);
if (g_rtx.tlas != VK_NULL_HANDLE)
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.tlas, NULL);
for (int i = 0; i < ARRAYSIZE(g_ray_model_state.models_cache); ++i) {
vk_ray_model_t *model = g_ray_model_state.models_cache + i;
if (model->as != VK_NULL_HANDLE)
vkDestroyAccelerationStructureKHR(vk_core.device, model->as, NULL);
model->as = VK_NULL_HANDLE;
}
destroyBuffer(&g_rtx.scratch_buffer);
destroyBuffer(&g_rtx.accels_buffer);
destroyBuffer(&g_rtx.tlas_geom_buffer);
destroyBuffer(&g_ray_model_state.kusochki_buffer);
destroyBuffer(&g_ray_model_state.lights_buffer);
destroyBuffer(&g_rtx.light_grid_buffer);
destroyBuffer(&g_rtx.sbt_buffer);
}