rtx: blindly add tlas and compute rendering

it's unknown whether it works at all
no real ray tracing yet
This commit is contained in:
Ivan Avdeev 2021-03-01 10:52:08 -08:00
parent c77f39e353
commit 5e3010489b
8 changed files with 327 additions and 51 deletions

View File

@ -7,7 +7,7 @@ build() {
glslc -o "build-debug-amd64/lib/xash3d/valve/$NAME.spv" "ref_vk/shaders/$NAME"
}
for s in 2d.frag 2d.vert brush.vert brush.frag
for s in 2d.frag 2d.vert brush.vert brush.frag rtx.comp
do
build "$s"
done

22
ref_vk/shaders/rtx.comp Normal file
View File

@ -0,0 +1,22 @@
#version 460
#extension GL_EXT_ray_query : require
// FIXME shader specialization
layout(local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0, set = 0, rgba8) uniform image2D image;
layout(binding = 1, set = 0) uniform accelerationStructureEXT tlas;
//layout(binding = 2, set = 0) buffer Vertices { vec4 vertices[]; };
/*
layout (push_constant) uniform PC {
};
*/
void main() {
vec2 res = imageSize(image);
vec2 uv = gl_GlobalInvocationID.xy / res * .5 - .5;
imageStore(image, ivec2(gl_GlobalInvocationID.xy), vec4(res, 0., 1.));
}

View File

@ -199,6 +199,9 @@ const char *resultName(VkResult result);
X(vkCmdUpdateBuffer) \
X(vkCmdBindIndexBuffer) \
X(vkCmdDrawIndexed) \
X(vkCmdPushConstants) \
X(vkCreateComputePipelines) \
X(vkCmdDispatch) \
#define DEVICE_FUNCS_RTX(X) \
X(vkGetAccelerationStructureBuildSizesKHR) \
@ -206,6 +209,7 @@ const char *resultName(VkResult result);
X(vkGetBufferDeviceAddress) \
X(vkCmdBuildAccelerationStructuresKHR) \
X(vkDestroyAccelerationStructureKHR) \
X(vkGetAccelerationStructureDeviceAddressKHR) \
#define X(f) extern PFN_##f f;
DEVICE_FUNCS(X)

View File

@ -179,7 +179,7 @@ static qboolean createSwapchain( void )
create_info->imageExtent.width = vk_frame.surface_caps.currentExtent.width;
create_info->imageExtent.height = vk_frame.surface_caps.currentExtent.height;
create_info->imageArrayLayers = 1;
create_info->imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
create_info->imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | (vk_core.rtx ? VK_IMAGE_USAGE_STORAGE_BIT : 0);
create_info->imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
create_info->preTransform = vk_frame.surface_caps.currentTransform;
create_info->compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
@ -328,25 +328,6 @@ void R_EndFrame( void )
{.depthStencil = {1., 0.}} // TODO reverse-z
};
VkPipelineStageFlags stageflags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSubmitInfo subinfo = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = NULL,
.commandBufferCount = 1,
.pCommandBuffers = &vk_core.cb,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &g_frame.image_available,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &g_frame.done,
.pWaitDstStageMask = &stageflags,
};
VkPresentInfoKHR presinfo = {
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pSwapchains = &vk_frame.swapchain,
.pImageIndices = &g_frame.swapchain_image_index,
.swapchainCount = 1,
.pWaitSemaphores = &g_frame.done,
.waitSemaphoreCount = 1,
};
{
VkCommandBufferBeginInfo beginfo = {
@ -357,7 +338,7 @@ void R_EndFrame( void )
}
if (vk_core.rtx)
VK_RenderEndRTX( vk_core.cb );
VK_RenderEndRTX( vk_core.cb, vk_frame.image_views[g_frame.swapchain_image_index], vk_frame.create_info.imageExtent.width, vk_frame.create_info.imageExtent.height );
{
VkRenderPassBeginInfo rpbi = {
@ -393,9 +374,31 @@ void R_EndFrame( void )
vkCmdEndRenderPass(vk_core.cb);
XVK_CHECK(vkEndCommandBuffer(vk_core.cb));
XVK_CHECK(vkQueueSubmit(vk_core.queue, 1, &subinfo, g_frame.fence));
{
const VkSubmitInfo subinfo = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = NULL,
.commandBufferCount = 1,
.pCommandBuffers = &vk_core.cb,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &g_frame.image_available,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &g_frame.done,
.pWaitDstStageMask = &stageflags,
};
XVK_CHECK(vkQueueSubmit(vk_core.queue, 1, &subinfo, g_frame.fence));
}
{
const VkPresentInfoKHR presinfo = {
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pSwapchains = &vk_frame.swapchain,
.pImageIndices = &g_frame.swapchain_image_index,
.swapchainCount = 1,
.pWaitSemaphores = &g_frame.done,
.waitSemaphoreCount = 1,
};
const VkResult present_result = vkQueuePresentKHR(vk_core.queue, &presinfo);
switch (present_result)
{

View File

@ -600,7 +600,7 @@ void VK_RenderDebugLabelEnd( void )
/* vkCmdEndDebugUtilsLabelEXT(vk_core.cb); */
}
void VK_RenderEndRTX( VkCommandBuffer cmdbuf )
void VK_RenderEndRTX( VkCommandBuffer cmdbuf, VkImageView img_dst, uint32_t w, uint32_t h )
{
ASSERT(vk_core.rtx);
VK_RaySceneBegin();
@ -625,5 +625,5 @@ void VK_RenderEndRTX( VkCommandBuffer cmdbuf )
VK_RayScenePushModel(cmdbuf, &ray_model_args);
}
VK_RaySceneEnd( cmdbuf );
VK_RaySceneEnd( cmdbuf, img_dst, w, h );
}

View File

@ -64,7 +64,7 @@ typedef struct render_draw_s {
void VK_RenderBegin( void );
void VK_RenderScheduleDraw( const render_draw_t *draw );
void VK_RenderEnd( VkCommandBuffer cmdbuf );
void VK_RenderEndRTX( VkCommandBuffer cmdbuf );
void VK_RenderEndRTX( VkCommandBuffer cmdbuf, VkImageView img_dst, uint32_t w, uint32_t h );
void VK_RenderDebugLabelBegin( const char *label );
void VK_RenderDebugLabelEnd( void );

View File

@ -11,6 +11,10 @@
#define MAX_SCRATCH_BUFFER (16*1024*1024)
#define MAX_ACCELS_BUFFER (64*1024*1024)
// TODO sync with shaders
#define WG_W 16
#define WG_H 8
/*
typedef struct {
//int lightmap, texture;
@ -22,16 +26,20 @@ typedef struct {
*/
static struct {
/* VkPipelineLayout pipeline_layout; */
/* VkPipeline rtx_compute_pipeline; */
/* VkDescriptorPool desc_pool; */
/* VkDescriptorSet desc_set; */
VkPipelineLayout pipeline_layout;
VkPipeline pipeline;
VkDescriptorSetLayout desc_layout;
VkDescriptorPool desc_pool;
VkDescriptorSet desc_set;
vk_buffer_t accels_buffer;
vk_buffer_t scratch_buffer;
VkDeviceAddress accels_buffer_addr, scratch_buffer_addr;
vk_buffer_t tlas_geom_buffer;
VkAccelerationStructureKHR accels[MAX_ACCELS];
VkAccelerationStructureKHR tlas;
} g_rtx;
static struct {
@ -44,7 +52,17 @@ static VkDeviceAddress getBufferDeviceAddress(VkBuffer buffer) {
return vkGetBufferDeviceAddress(vk_core.device, &bdai);
}
static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, const VkAccelerationStructureGeometryKHR *geoms, const uint32_t *max_prim_counts, const VkAccelerationStructureBuildRangeInfoKHR **build_ranges, uint32_t n_geoms, VkAccelerationStructureTypeKHR type) {
static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
VkAccelerationStructureDeviceAddressInfoKHR asdai = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
.accelerationStructure = as,
};
return vkGetAccelerationStructureDeviceAddressKHR(vk_core.device, &asdai);
}
static VkAccelerationStructureKHR createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, const VkAccelerationStructureGeometryKHR *geoms, const uint32_t *max_prim_counts, const VkAccelerationStructureBuildRangeInfoKHR **build_ranges, uint32_t n_geoms, VkAccelerationStructureTypeKHR type) {
VkAccelerationStructureKHR accel;
VkAccelerationStructureBuildGeometryInfoKHR build_info = {
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR,
.type = type,
@ -64,15 +82,10 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons
.type = type,
};
VkAccelerationStructureKHR *handle = g_rtx.accels + g_rtx_scene.num_accels;
if (g_rtx_scene.num_accels > ARRAYSIZE(g_rtx.accels)) {
gEngine.Con_Printf(S_ERROR "Ran out of AccelerationStructure slots\n");
return false;
}
vkGetAccelerationStructureBuildSizesKHR(
vk_core.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &build_info, max_prim_counts, &build_size);
if (0)
{
uint32_t max_prims = 0;
for (int i = 0; i < n_geoms; ++i)
@ -85,30 +98,28 @@ static qboolean createAndBuildAccelerationStructure(VkCommandBuffer cmdbuf, cons
gEngine.Con_Printf(S_ERROR "Scratch buffer overflow: left %u bytes, but need %u\n",
MAX_SCRATCH_BUFFER - g_rtx_scene.scratch_offset,
build_size.buildScratchSize);
return false;
return VK_NULL_HANDLE;
}
if (MAX_ACCELS_BUFFER - g_rtx_scene.buffer_offset < build_size.accelerationStructureSize) {
gEngine.Con_Printf(S_ERROR "Accels buffer overflow: left %u bytes, but need %u\n",
MAX_ACCELS_BUFFER - g_rtx_scene.buffer_offset,
build_size.accelerationStructureSize);
return false;
return VK_NULL_HANDLE;
}
asci.size = build_size.accelerationStructureSize;
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, handle));
XVK_CHECK(vkCreateAccelerationStructureKHR(vk_core.device, &asci, NULL, &accel));
// TODO alignment?
g_rtx_scene.buffer_offset += build_size.accelerationStructureSize;
g_rtx_scene.buffer_offset = (g_rtx_scene.buffer_offset + 255) & ~255; // Buffer must be aligned to 256 according to spec
g_rtx_scene.num_accels++;
build_info.dstAccelerationStructure = *handle;
build_info.dstAccelerationStructure = accel;
build_info.scratchData.deviceAddress = g_rtx.scratch_buffer_addr + g_rtx_scene.scratch_offset;
g_rtx_scene.scratch_offset += build_size.buildScratchSize;
vkCmdBuildAccelerationStructuresKHR(cmdbuf, 1, &build_info, build_ranges);
return true;
return accel;
}
void VK_RaySceneBegin( void )
@ -122,6 +133,8 @@ void VK_RaySceneBegin( void )
if (g_rtx.accels[i] != VK_NULL_HANDLE)
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.accels[i], NULL);
}
vkDestroyAccelerationStructureKHR(vk_core.device, g_rtx.tlas, NULL);
g_rtx_scene.num_accels = 0;
}
@ -133,6 +146,12 @@ static vk_ray_model_t *getModelByHandle(vk_ray_model_handle_t handle)
void VK_RayScenePushModel( VkCommandBuffer cmdbuf, const vk_ray_model_create_t *model) // _handle_t model_handle )
{
VkAccelerationStructureKHR *handle = g_rtx.accels + g_rtx_scene.num_accels;
if (g_rtx_scene.num_accels > ARRAYSIZE(g_rtx.accels)) {
gEngine.Con_Printf(S_ERROR "Ran out of AccelerationStructure slots\n");
return;
}
ASSERT(vk_core.rtx);
{
@ -162,19 +181,229 @@ void VK_RayScenePushModel( VkCommandBuffer cmdbuf, const vk_ray_model_create_t *
};
const VkAccelerationStructureBuildRangeInfoKHR* build_ranges[ARRAYSIZE(geom)] = { &build_range_tri };
createAndBuildAccelerationStructure(cmdbuf,
*handle = createAndBuildAccelerationStructure(cmdbuf,
geom, max_prim_counts, build_ranges, ARRAYSIZE(geom), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
if (!*handle) {
gEngine.Con_Printf(S_ERROR "Error building BLAS\n");
return;
}
g_rtx_scene.num_accels++;
}
}
void VK_RaySceneEnd( VkCommandBuffer cmdbuf )
void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst, uint32_t w, uint32_t h )
{
ASSERT(vk_core.rtx);
// 1. Barrier for building all BLASes
// Upload all blas instances references to GPU mem
{
VkAccelerationStructureInstanceKHR *inst = g_rtx.tlas_geom_buffer.mapped;
for (int i = 0; i < g_rtx_scene.num_accels; ++i) {
inst[i] = (VkAccelerationStructureInstanceKHR){
.transform = (VkTransformMatrixKHR){1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0},
.instanceCustomIndex = 0,
.mask = 0xff,
.instanceShaderBindingTableRecordOffset = 0,
.flags = 0,
.accelerationStructureReference = getASAddress(g_rtx.accels[i]),
};
}
}
// Barrier for building all BLASes
// BLAS building is now in cmdbuf, need to synchronize with results
{
VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR | VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR,
.buffer = g_rtx.accels_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
}};
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
// 2. Create TLAS
{
const VkAccelerationStructureGeometryKHR tl_geom[] = {
{
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
//.flags = VK_GEOMETRY_OPAQUE_BIT,
.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR,
.geometry.instances =
(VkAccelerationStructureGeometryInstancesDataKHR){
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR,
.data.deviceAddress = getBufferDeviceAddress(g_rtx.tlas_geom_buffer.buffer),
.arrayOfPointers = VK_FALSE,
},
},
};
const uint32_t tl_max_prim_counts[ARRAYSIZE(tl_geom)] = {g_rtx_scene.num_accels};
const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = {
.primitiveCount = g_rtx_scene.num_accels,
};
const VkAccelerationStructureBuildRangeInfoKHR *tl_build_ranges[] = {&tl_build_range};
g_rtx.tlas = createAndBuildAccelerationStructure(cmdbuf,
tl_geom, tl_max_prim_counts, tl_build_ranges, ARRAYSIZE(tl_geom), VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
}
// 3. Update descriptor sets (bind dest image, tlas, projection matrix)
{
const VkDescriptorImageInfo dii = {
.sampler = VK_NULL_HANDLE,
.imageView = img_dst,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
/*
const VkDescriptorBufferInfo dbi = {
.buffer = g_rtx.tri_buf.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
*/
const VkWriteDescriptorSetAccelerationStructureKHR wdsas = {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
.accelerationStructureCount = 1,
.pAccelerationStructures = &g_rtx.tlas,
};
const VkWriteDescriptorSet wds[] = {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dstSet = g_rtx.desc_set,
.dstBinding = 0,
.dstArrayElement = 0,
.pImageInfo = &dii,
},
/*
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.dstSet = g_rtx.desc_set,
.dstBinding = 2,
.dstArrayElement = 0,
.pBufferInfo = &dbi,
},*/
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.dstSet = g_rtx.desc_set,
.dstBinding = 1,
.dstArrayElement = 0,
.pNext = &wdsas,
},
};
vkUpdateDescriptorSets(vk_core.device, ARRAYSIZE(wds), wds, 0, NULL);
}
// 4. Barrier for TLAS build
{
VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.buffer = g_rtx.accels_buffer.buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
}};
vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL);
}
// 4. dispatch compute
// 5. blit to swapchain image // TODO is it more efficient to draw to it as a texture?
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, g_rtx.pipeline);
//vkCmdPushConstants(cmdbuf, g_rtx.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(matrix4x4), mvp);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, g_rtx.pipeline_layout, 0, 1, &g_rtx.desc_set, 0, NULL);
vkCmdDispatch(cmdbuf, (w+WG_W-1)/WG_W, (h+WG_H-1)/WG_H, 1);
}
static void createLayouts( void ) {
VkDescriptorSetLayoutBinding bindings[] = {{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
}, {
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
/* }, { */ /* .binding = 2, */
/* .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, */
/* .descriptorCount = 1, */
/* .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, */
},
};
VkDescriptorSetLayoutCreateInfo dslci = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = ARRAYSIZE(bindings), .pBindings = bindings, };
XVK_CHECK(vkCreateDescriptorSetLayout(vk_core.device, &dslci, NULL, &g_rtx.desc_layout));
/* VkPushConstantRange push_const = {0}; */
/* push_const.offset = 0; */
/* push_const.size = sizeof(matrix4x4); */
/* push_const.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; */
{
VkPipelineLayoutCreateInfo plci = {0};
plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
plci.setLayoutCount = 1;
plci.pSetLayouts = &g_rtx.desc_layout;
plci.pushConstantRangeCount = 1;
//plci.pPushConstantRanges = &push_const;
XVK_CHECK(vkCreatePipelineLayout(vk_core.device, &plci, NULL, &g_rtx.pipeline_layout));
}
{
VkDescriptorPoolSize pools[] = {
{.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .descriptorCount = 1},
//{.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1},
{.type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, .descriptorCount = 1},
};
VkDescriptorPoolCreateInfo dpci = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = 1, .poolSizeCount = ARRAYSIZE(pools), .pPoolSizes = pools,
};
XVK_CHECK(vkCreateDescriptorPool(vk_core.device, &dpci, NULL, &g_rtx.desc_pool));
}
{
VkDescriptorSetAllocateInfo dsai = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = g_rtx.desc_pool,
.descriptorSetCount = 1,
.pSetLayouts = &g_rtx.desc_layout,
};
XVK_CHECK(vkAllocateDescriptorSets(vk_core.device, &dsai, &g_rtx.desc_set));
}
}
static void createPipeline( void ) {
VkComputePipelineCreateInfo cpci = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.layout = g_rtx.pipeline_layout,
.stage = (VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = loadShader("rtx.comp.spv"),
.pName = "main",
},
};
XVK_CHECK(vkCreateComputePipelines(vk_core.device, VK_NULL_HANDLE, 1, &cpci, NULL, &g_rtx.pipeline));
}
qboolean VK_RayInit( void )
@ -198,15 +427,33 @@ qboolean VK_RayInit( void )
}
g_rtx.scratch_buffer_addr = getBufferDeviceAddress(g_rtx.scratch_buffer.buffer);
if (!createBuffer(&g_rtx.tlas_geom_buffer, sizeof(VkAccelerationStructureInstanceKHR) * MAX_ACCELS,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR,
/* TODO DEVICE_LOCAL */ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// FIXME complain, handle
return false;
}
createLayouts();
createPipeline();
return true;
}
void VK_RayShutdown( void )
{
ASSERT(vk_core.rtx);
destroyBuffer(&g_rtx.scratch_buffer);
destroyBuffer(&g_rtx.accels_buffer);
// TODO dealloc all ASes
vkDestroyPipeline(vk_core.device, g_rtx.pipeline, NULL);
vkDestroyDescriptorPool(vk_core.device, g_rtx.desc_pool, NULL);
vkDestroyPipelineLayout(vk_core.device, g_rtx.pipeline_layout, NULL);
vkDestroyDescriptorSetLayout(vk_core.device, g_rtx.desc_layout, NULL);
destroyBuffer(&g_rtx.scratch_buffer);
destroyBuffer(&g_rtx.accels_buffer);
destroyBuffer(&g_rtx.tlas_geom_buffer);
}

View File

@ -18,7 +18,7 @@ vk_ray_model_handle_t VK_RayModelCreate( const vk_ray_model_create_t *args );
void VK_RaySceneBegin( void );
void VK_RayScenePushModel(VkCommandBuffer cmdbuf, const vk_ray_model_create_t* model); // vk_ray_model_handle_t model );
void VK_RaySceneEnd( VkCommandBuffer cmdbuf );
void VK_RaySceneEnd( VkCommandBuffer cmdbuf, VkImageView img_dst, uint32_t w, uint32_t h );
qboolean VK_RayInit( void );
void VK_RayShutdown( void );