propagate proj/view matrices to rtx shader

And get RTX picture we deserve (i.e. glitchy and incorrect).
Will fix later.
This commit is contained in:
Ivan 'provod' Avdeev 2021-03-03 11:58:40 -08:00
parent 9b8af6d28b
commit 28cdeaeb1d
7 changed files with 141 additions and 45 deletions

View File

@ -6,6 +6,9 @@ layout(local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0, set = 0, rgba8) uniform image2D image;
layout(binding = 1, set = 0) uniform accelerationStructureEXT tlas;
layout(binding = 2, set = 0) uniform UBO {
mat4 inv_proj, inv_view;
} ubo;
//layout(binding = 2, set = 0) buffer Vertices { vec4 vertices[]; };
@ -16,7 +19,26 @@ layout (push_constant) uniform PC {
void main() {
vec2 res = imageSize(image);
vec2 uv = gl_GlobalInvocationID.xy / res * .5 - .5;
vec2 uv = (gl_GlobalInvocationID.xy + .5) / res * 2. - 1.;
imageStore(image, ivec2(gl_GlobalInvocationID.xy), vec4(res, 0., 1.));
vec4 origin = ubo.inv_view * vec4(0, 0, 0, 1);
vec4 target = ubo.inv_proj * vec4(uv.x, uv.y, 1, 1);
vec4 direction = ubo.inv_view * vec4(normalize(target.xyz), 0);
vec3 C = vec3(0.);
vec3 O = origin.xyz, D=direction.xyz;
const float L = 10000.;
rayQueryEXT rayQuery;
rayQueryInitializeEXT(rayQuery, tlas, gl_RayFlagsOpaqueEXT, 0xff, O, 0., D, L);
while(rayQueryProceedEXT(rayQuery)) {}
const float l = rayQueryGetIntersectionTEXT(rayQuery, true);
if (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionTriangleEXT) {
C = vec3(1., 0., 1.);
} else {
vec3 pos = O+D*l;
C = fract(pos);
}
imageStore(image, ivec2(gl_GlobalInvocationID.xy), vec4(C, 1.));
}

View File

@ -20,6 +20,8 @@ typedef struct vk_global_camera_s {
matrix4x4 projectionMatrix;
matrix4x4 worldviewProjectionMatrix; // worldviewMatrix * projectionMatrix
matrix4x4 projectionMatrixVk;
} vk_global_camera_t;
extern vk_global_camera_t g_camera;

View File

@ -44,7 +44,7 @@ static struct {
uint32_t buffer_frame_begin_offset;
vk_buffer_t uniform_buffer;
uint32_t uniform_unit_size;
uint32_t ubo_align;
struct {
int align_holes_size;
@ -243,16 +243,17 @@ qboolean VK_RenderInit( void )
// TODO Better estimates
const uint32_t vertex_buffer_size = MAX_BUFFER_VERTICES * sizeof(float) * (3 + 3 + 2 + 2);
const uint32_t index_buffer_size = MAX_BUFFER_INDICES * sizeof(uint16_t);
const uint32_t ubo_align = Q_max(4, vk_core.physical_device.properties.limits.minUniformBufferOffsetAlignment);
uint32_t uniform_unit_size;
g_render.uniform_unit_size = ((sizeof(uniform_data_t) + ubo_align - 1) / ubo_align) * ubo_align;
g_render.ubo_align = Q_max(4, vk_core.physical_device.properties.limits.minUniformBufferOffsetAlignment);
uniform_unit_size = ((sizeof(uniform_data_t) + g_render.ubo_align - 1) / g_render.ubo_align) * g_render.ubo_align;
// TODO device memory and friends (e.g. handle mobile memory ...)
if (!createBuffer(&g_render.buffer, vertex_buffer_size + index_buffer_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | (vk_core.rtx ? VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT : 0), VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
return false;
if (!createBuffer(&g_render.uniform_buffer, g_render.uniform_unit_size * MAX_UNIFORM_SLOTS, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
if (!createBuffer(&g_render.uniform_buffer, uniform_unit_size * MAX_UNIFORM_SLOTS, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
return false;
{
@ -415,12 +416,19 @@ typedef struct {
static struct {
int uniform_data_set_mask;
int next_free_uniform_slot;
uniform_data_t current_uniform_data;
uniform_data_t dirty_uniform_data;
uint32_t current_ubo_offset;
uint32_t uniform_free_offset;
draw_command_t draw_commands[MAX_DRAW_COMMANDS];
int num_draw_commands;
// FIXME vk_rtx-specific
struct {
matrix4x4 proj_inv, view_inv;
} rtx;
} g_render_state;
enum {
@ -432,8 +440,9 @@ enum {
};
void VK_RenderBegin( void ) {
g_render_state.next_free_uniform_slot = 0;
g_render_state.uniform_free_offset = 0;
g_render_state.uniform_data_set_mask = UNIFORM_UNSET;
g_render_state.current_ubo_offset = UINT32_MAX;
memset(&g_render_state.current_uniform_data, 0, sizeof(g_render_state.current_uniform_data));
memset(&g_render_state.dirty_uniform_data, 0, sizeof(g_render_state.dirty_uniform_data));
@ -456,23 +465,33 @@ void VK_RenderStateSetMatrix( const matrix4x4 mvp )
Matrix4x4_ToArrayFloatGL( mvp, (float*)g_render_state.dirty_uniform_data.mvp );
}
static uniform_data_t *getUniformSlot(int index)
void VK_RenderStateSetProjectionMatrix(const matrix4x4 proj)
{
ASSERT(index >= 0);
ASSERT(index < MAX_UNIFORM_SLOTS);
return (uniform_data_t*)(((uint8_t*)g_render.uniform_buffer.mapped) + (g_render.uniform_unit_size * index));
matrix4x4 tmp;
Matrix4x4_Invert_Full(tmp, proj);
Matrix4x4_ToArrayFloatGL( tmp, g_render_state.rtx.proj_inv);
}
static int allocUniformSlot( void ) {
if (g_render_state.next_free_uniform_slot == MAX_UNIFORM_SLOTS)
return -1;
void VK_RenderStateSetViewMatrix(const matrix4x4 view)
{
matrix4x4 tmp;
Matrix4x4_Invert_Full(tmp, view);
Matrix4x4_ToArrayFloatGL( tmp, g_render_state.rtx.view_inv);
}
return g_render_state.next_free_uniform_slot++;
static uint32_t allocUniform( uint32_t size, uint32_t alignment ) {
// FIXME Q_max is not correct, we need NAIMENSCHEEE OBSCHEEE KRATNOE
const uint32_t align = Q_max(alignment, g_render.ubo_align);
const uint32_t offset = (((g_render_state.uniform_free_offset + align - 1) / align) * align);
if (offset + size > g_render.uniform_buffer.size)
return UINT32_MAX;
g_render_state.uniform_free_offset = offset + size;
return offset;
}
void VK_RenderScheduleDraw( const render_draw_t *draw )
{
int ubo_index = g_render_state.next_free_uniform_slot - 1;
const vk_buffer_alloc_t *vertex_buffer = NULL, *index_buffer = NULL;
draw_command_t *draw_command;
@ -507,15 +526,15 @@ void VK_RenderScheduleDraw( const render_draw_t *draw )
// Figure out whether we need to update UBO data, and upload new data if we do
// TODO generally it's not safe to do memcmp for structures comparison
if (((g_render_state.uniform_data_set_mask & UNIFORM_UPLOADED) == 0) || memcmp(&g_render_state.current_uniform_data, &g_render_state.dirty_uniform_data, sizeof(g_render_state.current_uniform_data)) != 0) {
if (g_render_state.current_ubo_offset == UINT32_MAX || ((g_render_state.uniform_data_set_mask & UNIFORM_UPLOADED) == 0) || memcmp(&g_render_state.current_uniform_data, &g_render_state.dirty_uniform_data, sizeof(g_render_state.current_uniform_data)) != 0) {
uniform_data_t *ubo;
ubo_index = allocUniformSlot();
if (ubo_index < 0) {
g_render_state.current_ubo_offset = allocUniform( sizeof(uniform_data_t), 16 );
if (g_render_state.current_ubo_offset == UINT32_MAX) {
gEngine.Con_Printf( S_ERROR "Ran out of uniform slots\n" );
return;
}
ubo = getUniformSlot( ubo_index );
ubo = (uniform_data_t*)((byte*)g_render.uniform_buffer.mapped + g_render_state.current_ubo_offset);
memcpy(&g_render_state.current_uniform_data, &g_render_state.dirty_uniform_data, sizeof(g_render_state.dirty_uniform_data));
memcpy(ubo, &g_render_state.current_uniform_data, sizeof(*ubo));
g_render_state.uniform_data_set_mask |= UNIFORM_UPLOADED;
@ -523,7 +542,7 @@ void VK_RenderScheduleDraw( const render_draw_t *draw )
draw_command = g_render_state.draw_commands + (g_render_state.num_draw_commands++);
draw_command->draw = *draw;
draw_command->ubo_offset = g_render.uniform_unit_size * ubo_index;
draw_command->ubo_offset = g_render_state.current_ubo_offset;
}
void VK_RenderEnd( VkCommandBuffer cmdbuf )
@ -625,5 +644,33 @@ void VK_RenderEndRTX( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage
VK_RayScenePushModel(cmdbuf, &ray_model_args);
}
VK_RaySceneEnd( cmdbuf, img_dst_view, img_dst, w, h );
{
float *matrices = NULL;
const vk_ray_scene_render_args_t args = {
.cmdbuf = cmdbuf,
.dst = {
.image_view = img_dst_view,
.image = img_dst,
.width = w,
.height = h,
},
// FIXME this should really be in vk_rtx, calling vk_render(or what?) to alloc slot for it
.ubo = {
.buffer = g_render.uniform_buffer.buffer,
.offset = allocUniform(sizeof(float) * 16 * 2, 16 * sizeof(float)),
.size = sizeof(float) * 16 * 2,
},
};
if (args.ubo.offset == UINT32_MAX) {
gEngine.Con_Printf(S_ERROR "Cannot allocate UBO for RTX\n");
return;
}
matrices = (byte*)g_render.uniform_buffer.mapped + args.ubo.offset;
memcpy(matrices, &g_render_state.rtx, sizeof(g_render_state.rtx));
VK_RaySceneEnd(&args);
}
}

View File

@ -46,6 +46,9 @@ void VK_RenderStateSetColor( float r, float g, float b, float a );
void VK_RenderStateSetMatrix( const matrix4x4 mvp );
// TODO: set projection and mv matrices separately
void VK_RenderStateSetProjectionMatrix(const matrix4x4 proj);
void VK_RenderStateSetViewMatrix(const matrix4x4 view);
// TODO is this a good place?
typedef struct vk_vertex_s {
vec3_t pos;

View File

@ -201,14 +201,17 @@ void VK_RayScenePushModel( VkCommandBuffer cmdbuf, const vk_ray_model_create_t *
}
}
void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage img_dst, uint32_t w, uint32_t h )
void VK_RaySceneEnd(const vk_ray_scene_render_args_t* args)
{
ASSERT(vk_core.rtx);
ASSERT(args->ubo.size == sizeof(float) * 16 * 2); // ubo should contain two matrices
const VkCommandBuffer cmdbuf = args->cmdbuf;
// Upload all blas instances references to GPU mem
{
VkAccelerationStructureInstanceKHR *inst = g_rtx.tlas_geom_buffer.mapped;
for (int i = 0; i < g_rtx_scene.num_accels; ++i) {
ASSERT(g_rtx.accels[i] != VK_NULL_HANDLE);
inst[i] = (VkAccelerationStructureInstanceKHR){
.transform = (VkTransformMatrixKHR){1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0},
.instanceCustomIndex = 0,
@ -232,9 +235,9 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i
.size = VK_WHOLE_SIZE,
}};
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
// 2. Create TLAS
@ -255,7 +258,7 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i
const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = {g_rtx_scene.num_accels};
const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = {
.primitiveCount = g_rtx_scene.num_accels,
.primitiveCount = g_rtx_scene.num_accels,
};
const VkAccelerationStructureBuildRangeInfoKHR *tl_build_ranges[] = {&tl_build_range};
g_rtx.tlas = createAndBuildAccelerationStructure(cmdbuf,
@ -266,16 +269,14 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i
{
const VkDescriptorImageInfo dii = {
.sampler = VK_NULL_HANDLE,
.imageView = img_dst_view,
.imageView = args->dst.image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
/*
const VkDescriptorBufferInfo dbi = {
.buffer = g_rtx.tri_buf.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
.buffer = args->ubo.buffer,
.offset = args->ubo.offset,
.range = args->ubo.size,
};
*/
const VkWriteDescriptorSetAccelerationStructureKHR wdsas = {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
.accelerationStructureCount = 1,
@ -291,16 +292,15 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i
.dstArrayElement = 0,
.pImageInfo = &dii,
},
/*
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.dstSet = g_rtx.desc_set,
.dstBinding = 2,
.dstArrayElement = 0,
.pBufferInfo = &dbi,
},*/
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.descriptorCount = 1,
@ -328,7 +328,7 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i
}};
VkImageMemoryBarrier image_barrier[] = { {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = img_dst,
.image = args->dst.image,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
@ -348,7 +348,7 @@ void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage i
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, g_rtx.pipeline);
//vkCmdPushConstants(cmdbuf, g_rtx.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(matrix4x4), mvp);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, g_rtx.pipeline_layout, 0, 1, &g_rtx.desc_set, 0, NULL);
vkCmdDispatch(cmdbuf, (w+WG_W-1)/WG_W, (h+WG_H-1)/WG_H, 1);
vkCmdDispatch(cmdbuf, (args->dst.width+WG_W-1)/WG_W, (args->dst.height+WG_H-1)/WG_H, 1);
}
static void createLayouts( void ) {
@ -362,10 +362,11 @@ static void createLayouts( void ) {
.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
/* }, { */ /* .binding = 2, */
/* .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, */
/* .descriptorCount = 1, */
/* .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, */
}, {
.binding = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
@ -391,7 +392,7 @@ static void createLayouts( void ) {
{
VkDescriptorPoolSize pools[] = {
{.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .descriptorCount = 1},
//{.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1},
{.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .descriptorCount = 1},
{.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, .descriptorCount = 1},
};

View File

@ -18,7 +18,24 @@ vk_ray_model_handle_t VK_RayModelCreate( const vk_ray_model_create_t *args );
void VK_RaySceneBegin( void );
void VK_RayScenePushModel(VkCommandBuffer cmdbuf, const vk_ray_model_create_t* model); // vk_ray_model_handle_t model );
void VK_RaySceneEnd(VkCommandBuffer cmdbuf, VkImageView img_dst_view, VkImage img_dst, uint32_t w, uint32_t h);
typedef struct {
VkCommandBuffer cmdbuf;
struct {
VkImageView image_view;
VkImage image;
uint32_t width, height;
} dst;
// inv_view/proj matrices
struct {
VkBuffer buffer;
uint32_t offset;
uint32_t size;
} ubo;
} vk_ray_scene_render_args_t;
void VK_RaySceneEnd(const vk_ray_scene_render_args_t* args);
qboolean VK_RayInit( void );
void VK_RayShutdown( void );

View File

@ -548,6 +548,7 @@ static void setupCamera( const ref_viewpass_t *rvp, matrix4x4 mvp )
{0, 0, .5, 1}
};
Matrix4x4_Concat( mvp, vk_proj_fixup, g_camera.worldviewProjectionMatrix);
Matrix4x4_Concat( g_camera.projectionMatrixVk, vk_proj_fixup, g_camera.projectionMatrix);
}
}
@ -632,6 +633,9 @@ void VK_SceneRender( const ref_viewpass_t *rvp )
setupCamera( rvp, mvp );
VK_RenderStateSetProjectionMatrix(g_camera.projectionMatrixVk);
VK_RenderStateSetViewMatrix(g_camera.modelviewMatrix);
VK_RenderDebugLabelBegin( "opaque" );
// Draw view model