xash3d-fwgs/ref_vk/vk_rtx.c
2022-10-22 14:52:24 -07:00

456 lines
14 KiB
C

#include "vk_rtx.h"
#include "ray_pass.h"
#include "ray_resources.h"
#include "vk_ray_accel.h"
#include "vk_buffer.h"
#include "vk_common.h"
#include "vk_core.h"
#include "vk_cvar.h"
#include "vk_descriptor.h"
#include "vk_light.h"
#include "vk_math.h"
#include "vk_meatpipe.h"
#include "vk_pipeline.h"
#include "vk_ray_internal.h"
#include "vk_staging.h"
#include "vk_textures.h"
#include "alolcator.h"
#include "eiface.h"
#include "xash3d_mathlib.h"
#include <string.h>
#define MAX_FRAMES_IN_FLIGHT 2
// TODO settings/realtime modifiable/adaptive
#if 1
#define FRAME_WIDTH 1280
#define FRAME_HEIGHT 720
#elif 0
#define FRAME_WIDTH 2560
#define FRAME_HEIGHT 1440
#else
#define FRAME_WIDTH 1920
#define FRAME_HEIGHT 1080
#endif
// TODO sync with shaders
// TODO optimal values
#define WG_W 16
#define WG_H 8
typedef struct {
vec3_t pos;
float radius;
vec3_t color;
float padding_;
} vk_light_t;
typedef struct PushConstants vk_rtx_push_constants_t;
typedef struct {
xvk_image_t denoised;
#define X(index, name, ...) xvk_image_t name;
RAY_PRIMARY_OUTPUTS(X)
RAY_LIGHT_DIRECT_POLY_OUTPUTS(X)
RAY_LIGHT_DIRECT_POINT_OUTPUTS(X)
#undef X
xvk_image_t diffuse_gi;
xvk_image_t specular;
xvk_image_t additive;
} xvk_ray_frame_images_t;
static struct {
// Holds UniformBuffer data
vk_buffer_t uniform_buffer;
uint32_t uniform_unit_size;
// TODO with proper intra-cmdbuf sync we don't really need 2x images
unsigned frame_number;
xvk_ray_frame_images_t frames[MAX_FRAMES_IN_FLIGHT];
vk_meatpipe_t mainpipe;
qboolean reload_pipeline;
qboolean reload_lighting;
} g_rtx = {0};
void VK_RayNewMap( void ) {
RT_VkAccelNewMap();
RT_RayModel_Clear();
}
void VK_RayFrameBegin( void )
{
ASSERT(vk_core.rtx);
RT_VkAccelFrameBegin();
if (g_ray_model_state.freeze_models)
return;
XVK_RayModel_ClearForNextFrame();
// TODO: move all lighting update to scene?
if (g_rtx.reload_lighting) {
g_rtx.reload_lighting = false;
// FIXME temporarily not supported VK_LightsLoadMapStaticLights();
}
// TODO shouldn't we do this in freeze models mode anyway?
RT_LightsFrameBegin();
}
static void prepareUniformBuffer( const vk_ray_frame_render_args_t *args, int frame_index, float fov_angle_y ) {
struct UniformBuffer *ubo = (struct UniformBuffer*)((char*)g_rtx.uniform_buffer.mapped + frame_index * g_rtx.uniform_unit_size);
matrix4x4 proj_inv, view_inv;
Matrix4x4_Invert_Full(proj_inv, *args->projection);
Matrix4x4_ToArrayFloatGL(proj_inv, (float*)ubo->inv_proj);
// TODO there's a more efficient way to construct an inverse view matrix
// from vforward/right/up vectors and origin in g_camera
Matrix4x4_Invert_Full(view_inv, *args->view);
Matrix4x4_ToArrayFloatGL(view_inv, (float*)ubo->inv_view);
ubo->ray_cone_width = atanf((2.0f*tanf(DEG2RAD(fov_angle_y) * 0.5f)) / (float)FRAME_HEIGHT);
ubo->random_seed = (uint32_t)gEngine.COM_RandomLong(0, INT32_MAX);
}
typedef struct {
const vk_ray_frame_render_args_t* render_args;
int frame_index;
const xvk_ray_frame_images_t *current_frame;
float fov_angle_y;
const vk_lights_bindings_t *light_bindings;
} perform_tracing_args_t;
static void performTracing(VkCommandBuffer cmdbuf, const perform_tracing_args_t* args) {
vk_ray_resources_t res = {
.width = FRAME_WIDTH,
.height = FRAME_HEIGHT,
.resources = {
[RayResource_tlas] = {
.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.value.accel = (VkWriteDescriptorSetAccelerationStructureKHR){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
.accelerationStructureCount = 1,
.pAccelerationStructures = &g_accel.tlas,
.pNext = NULL,
},
},
#define RES_SET_BUFFER(name, type_, source_, offset_, size_) \
[RayResource_##name] = { \
.type = type_, \
.value.buffer = (VkDescriptorBufferInfo) { \
.buffer = (source_), \
.offset = (offset_), \
.range = (size_), \
} \
}
RES_SET_BUFFER(ubo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, g_rtx.uniform_buffer.buffer, args->frame_index * g_rtx.uniform_unit_size, sizeof(struct UniformBuffer)),
#define RES_SET_SBUFFER_FULL(name, source_) \
RES_SET_BUFFER(name, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, source_.buffer, 0, source_.size)
RES_SET_SBUFFER_FULL(kusochki, g_ray_model_state.kusochki_buffer),
RES_SET_SBUFFER_FULL(indices, args->render_args->geometry_data),
RES_SET_SBUFFER_FULL(vertices, args->render_args->geometry_data),
RES_SET_BUFFER(lights, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->metadata.offset, args->light_bindings->metadata.size),
RES_SET_BUFFER(light_clusters, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->grid.offset, args->light_bindings->grid.size),
#undef RES_SET_SBUFFER_FULL
#undef RES_SET_BUFFER
[RayResource_all_textures] = {
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.value.image_array = tglob.dii_all_textures,
},
[RayResource_skybox] = {
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.value.image = {
.sampler = vk_core.default_sampler,
.imageView = tglob.skybox_cube.vk.image.view ? tglob.skybox_cube.vk.image.view : tglob.cubemap_placeholder.vk.image.view,
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
},
},
#define RES_SET_IMAGE(index, name, ...) \
[RayResource_##name] = { \
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, \
.write = {0}, \
.read = {0}, \
.image = &args->current_frame->name, \
},
RAY_PRIMARY_OUTPUTS(RES_SET_IMAGE)
RAY_LIGHT_DIRECT_POLY_OUTPUTS(RES_SET_IMAGE)
RAY_LIGHT_DIRECT_POINT_OUTPUTS(RES_SET_IMAGE)
RES_SET_IMAGE(-1, denoised)
#undef RES_SET_IMAGE
},
};
// Upload kusochki updates
{
const VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR,
.buffer = g_ray_model_state.kusochki_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
} };
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
DEBUG_BEGIN(cmdbuf, "yay tracing");
RT_VkAccelPrepareTlas(cmdbuf);
prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y);
// 4. Barrier for TLAS build
{
const VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.buffer = g_accel.accels_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
} };
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
{ // FIXME this should be done automatically inside meatpipe, TODO
//const uint32_t size = sizeof(struct Lights);
//const uint32_t size = sizeof(struct LightsMetadata); // + 8 * sizeof(uint32_t);
const VkBufferMemoryBarrier bmb[] = {{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.buffer = args->light_bindings->buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
}};
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
R_VkMeatpipePerform(&g_rtx.mainpipe, cmdbuf, args->frame_index, &res);
{
const r_vkimage_blit_args blit_args = {
.in_stage = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
.src = {
.image = args->current_frame->denoised.image,
.width = FRAME_WIDTH,
.height = FRAME_HEIGHT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
},
.dst = {
.image = args->render_args->dst.image,
.width = args->render_args->dst.width,
.height = args->render_args->dst.height,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.srcAccessMask = 0,
},
};
R_VkImageBlit( cmdbuf, &blit_args );
}
DEBUG_END(cmdbuf);
}
void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
{
const VkCommandBuffer cmdbuf = args->cmdbuf;
const xvk_ray_frame_images_t* current_frame = g_rtx.frames + (g_rtx.frame_number % 2);
ASSERT(vk_core.rtx);
// ubo should contain two matrices
// FIXME pass these matrices explicitly to let RTX module handle ubo itself
RT_LightsFrameEnd();
const vk_lights_bindings_t light_bindings = VK_LightsUpload(cmdbuf);
g_rtx.frame_number++;
// if (vk_core.debug)
// XVK_RayModel_Validate();
if (g_rtx.reload_pipeline) {
gEngine.Con_Printf(S_WARN "Reloading RTX shaders/pipelines\n");
XVK_CHECK(vkDeviceWaitIdle(vk_core.device));
vk_meatpipe_t newpipe;
if (R_VkMeatpipeLoad(&newpipe, "rt.meat")) {
R_VkMeatpipeDestroy(&g_rtx.mainpipe);
g_rtx.mainpipe = newpipe;
}
g_rtx.reload_pipeline = false;
}
if (g_ray_model_state.frame.num_models == 0) {
const r_vkimage_blit_args blit_args = {
.in_stage = VK_PIPELINE_STAGE_TRANSFER_BIT,
.src = {
.image = current_frame->denoised.image,
.width = FRAME_WIDTH,
.height = FRAME_HEIGHT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
},
.dst = {
.image = args->dst.image,
.width = args->dst.width,
.height = args->dst.height,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.srcAccessMask = 0,
},
};
R_VkImageClear( cmdbuf, current_frame->denoised.image );
R_VkImageBlit( cmdbuf, &blit_args );
} else {
const perform_tracing_args_t trace_args = {
.render_args = args,
.frame_index = (g_rtx.frame_number % 2),
.current_frame = current_frame,
.fov_angle_y = args->fov_angle_y,
.light_bindings = &light_bindings,
};
performTracing( cmdbuf, &trace_args );
}
}
static void reloadPipeline( void ) {
g_rtx.reload_pipeline = true;
}
static void reloadLighting( void ) {
g_rtx.reload_lighting = true;
}
static void freezeModels( void ) {
g_ray_model_state.freeze_models = !g_ray_model_state.freeze_models;
}
qboolean VK_RayInit( void )
{
ASSERT(vk_core.rtx);
// TODO complain and cleanup on failure
if (!RT_VkAccelInit())
return false;
ASSERT(R_VkMeatpipeLoad(&g_rtx.mainpipe, "rt.meat"));
g_rtx.uniform_unit_size = ALIGN_UP(sizeof(struct UniformBuffer), vk_core.physical_device.properties.limits.minUniformBufferOffsetAlignment);
if (!VK_BufferCreate("ray uniform_buffer", &g_rtx.uniform_buffer, g_rtx.uniform_unit_size * MAX_FRAMES_IN_FLIGHT,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
{
return false;
}
if (!VK_BufferCreate("ray kusochki_buffer", &g_ray_model_state.kusochki_buffer, sizeof(vk_kusok_data_t) * MAX_KUSOCHKI,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
// FIXME complain, handle
return false;
}
RT_RayModel_Clear();
for (int i = 0; i < ARRAYSIZE(g_rtx.frames); ++i) {
#define CREATE_GBUFFER_IMAGE(name, format_, add_usage_bits) \
do { \
char debug_name[64]; \
const xvk_image_create_t create = { \
.debug_name = debug_name, \
.width = FRAME_WIDTH, \
.height = FRAME_HEIGHT, \
.mips = 1, \
.layers = 1, \
.format = format_, \
.tiling = VK_IMAGE_TILING_OPTIMAL, \
.usage = VK_IMAGE_USAGE_STORAGE_BIT | add_usage_bits, \
.has_alpha = true, \
.is_cubemap = false, \
}; \
Q_snprintf(debug_name, sizeof(debug_name), "rtx frames[%d] " # name, i); \
g_rtx.frames[i].name = XVK_ImageCreate(&create); \
} while(0)
CREATE_GBUFFER_IMAGE(denoised, VK_FORMAT_R16G16B16A16_SFLOAT, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
#define rgba8 VK_FORMAT_R8G8B8A8_UNORM
#define rgba32f VK_FORMAT_R32G32B32A32_SFLOAT
#define rgba16f VK_FORMAT_R16G16B16A16_SFLOAT
#define X(index, name, format) CREATE_GBUFFER_IMAGE(name, format, 0);
// TODO better format for normals VK_FORMAT_R16G16B16A16_SNORM
// TODO make sure this format and usage is suppported
RAY_PRIMARY_OUTPUTS(X)
RAY_LIGHT_DIRECT_POLY_OUTPUTS(X)
RAY_LIGHT_DIRECT_POINT_OUTPUTS(X)
#undef X
#undef rgba8
#undef rgba32f
#undef rgba16f
CREATE_GBUFFER_IMAGE(diffuse_gi, VK_FORMAT_R16G16B16A16_SFLOAT, 0);
CREATE_GBUFFER_IMAGE(specular, VK_FORMAT_R16G16B16A16_SFLOAT, 0);
CREATE_GBUFFER_IMAGE(additive, VK_FORMAT_R16G16B16A16_SFLOAT, 0);
#undef CREATE_GBUFFER_IMAGE
}
gEngine.Cmd_AddCommand("vk_rtx_reload", reloadPipeline, "Reload RTX shader");
gEngine.Cmd_AddCommand("vk_rtx_reload_rad", reloadLighting, "Reload RAD files for static lights");
gEngine.Cmd_AddCommand("vk_rtx_freeze", freezeModels, "Freeze models, do not update/add/delete models from to-draw list");
return true;
}
void VK_RayShutdown( void ) {
ASSERT(vk_core.rtx);
R_VkMeatpipeDestroy(&g_rtx.mainpipe);
/* RayPassDestroy(g_rtx.pass.denoiser); */
/* RayPassDestroy(g_rtx.pass.light_direct_poly); */
/* RayPassDestroy(g_rtx.pass.light_direct_point); */
/* RayPassDestroy(g_rtx.pass.primary_ray); */
for (int i = 0; i < ARRAYSIZE(g_rtx.frames); ++i) {
XVK_ImageDestroy(&g_rtx.frames[i].denoised);
#define X(index, name, ...) XVK_ImageDestroy(&g_rtx.frames[i].name);
RAY_PRIMARY_OUTPUTS(X)
RAY_LIGHT_DIRECT_POLY_OUTPUTS(X)
RAY_LIGHT_DIRECT_POINT_OUTPUTS(X)
#undef X
XVK_ImageDestroy(&g_rtx.frames[i].diffuse_gi);
XVK_ImageDestroy(&g_rtx.frames[i].specular);
XVK_ImageDestroy(&g_rtx.frames[i].additive);
}
VK_BufferDestroy(&g_ray_model_state.kusochki_buffer);
VK_BufferDestroy(&g_rtx.uniform_buffer);
RT_VkAccelShutdown();
}