vk: profiler: cover staging with combuf scopes; add build_as time
This commit is contained in:
parent
f6201e460f
commit
92ce698292
|
@ -26,4 +26,46 @@
|
|||
- **(ref) R_AllowFog(true)**
|
||||
- **(ref) R_EndFrame()**
|
||||
|
||||
# Staging and multiple command buffers
|
||||
We want to get rid of extra command buffers used for staging (and building blases). That would mean tying any command-buffer related things in there to framectl.
|
||||
However, there are several staging cmdbuf usages which are technically out-of-band wrt framectl:
|
||||
0. Staging data can get full, which requires sync flush: filling cmdbuf outside of frame (or while still building a frame), submitting it and waiting on it.
|
||||
1. Texture uploading. There's an explicit usage of staging cmdbuf in vk_texture to do layout transfer. This layout transfer can be moved to staging itself.
|
||||
2. BLAS building. Creating a ray model uploads its geometry via staging and then immediately builds its BLAS on the same staging cmdbuf. Ideally(?), we'd like to split BLAS building to some later stage to do it in bulk.
|
||||
|
||||
# OpenGL-like immediate mode rendering, ~TriApi
|
||||
## Functions:
|
||||
R_Set2DMode(bool) -- switches between 3D scene and 2D overlay modes; used in engine
|
||||
R_DrawStretchRaw,
|
||||
R_DrawStretchPic,
|
||||
R_DrawTileClear,
|
||||
CL_FillRGBA,
|
||||
CL_FillRGBABlend,
|
||||
|
||||
R_AllowFog,
|
||||
GL_SetRenderMode,
|
||||
|
||||
void (*GL_Bind)( int tmu, unsigned int texnum );
|
||||
void (*GL_SelectTexture)( int tmu );
|
||||
|
||||
void (*GL_LoadTextureMatrix)( const float *glmatrix ); -- exported to the game, not used in engine
|
||||
void (*GL_TexMatrixIdentity)( void ); -- exported to the game, not used in engine
|
||||
|
||||
void (*GL_CleanUpTextureUnits)( int last ); // pass 0 for clear all the texture units
|
||||
void (*GL_TexGen)( unsigned int coord, unsigned int mode );
|
||||
void (*GL_TextureTarget)( unsigned int target ); // change texture unit mode without bind texture
|
||||
void (*GL_TexCoordArrayMode)( unsigned int texmode );
|
||||
void (*GL_UpdateTexSize)( int texnum, int width, int height, int depth ); // recalc statistics
|
||||
|
||||
TriRenderMode,
|
||||
TriBegin,
|
||||
TriEnd,
|
||||
TriColor4f,
|
||||
TriColor4ub,
|
||||
TriTexCoord2f,
|
||||
TriVertex3fv,
|
||||
TriVertex3f,
|
||||
TriFog,
|
||||
TriGetMatrix,
|
||||
TriFogParams,
|
||||
TriCullFace,
|
||||
|
|
|
@ -345,7 +345,7 @@ static int drawGraph( r_speeds_graph_t *const graph, int frame_bar_y ) {
|
|||
return frame_bar_y;
|
||||
}
|
||||
|
||||
static int drawFrames( int draw, uint32_t prev_frame_index, int y, const vk_combuf_scopes_t *gpurofl) {
|
||||
static int drawFrames( int draw, uint32_t prev_frame_index, int y, const vk_combuf_scopes_t *gpurofls, int gpurofls_count) {
|
||||
// Draw latest 2 frames; find their boundaries
|
||||
uint32_t rewind_frame = prev_frame_index;
|
||||
const int max_frames_to_draw = 2;
|
||||
|
@ -378,22 +378,25 @@ static int drawFrames( int draw, uint32_t prev_frame_index, int y, const vk_comb
|
|||
y += g_speeds.font_metrics.glyph_height * 6;
|
||||
const int bar_height = g_speeds.font_metrics.glyph_height;
|
||||
|
||||
for (int i = 0; i < gpurofl->entries_count; ++i) {
|
||||
const int scope_index = gpurofl->entries[i];
|
||||
const uint64_t begin_ns = gpurofl->timestamps[scope_index*2 + 0];
|
||||
const uint64_t end_ns = gpurofl->timestamps[scope_index*2 + 1];
|
||||
const char *name = gpurofl->scopes[scope_index].name;
|
||||
for (int j = 0; j < gpurofls_count; ++j) {
|
||||
const vk_combuf_scopes_t *const gpurofl = gpurofls + j;
|
||||
for (int i = 0; i < gpurofl->entries_count; ++i) {
|
||||
const int scope_index = gpurofl->entries[i];
|
||||
const uint64_t begin_ns = gpurofl->timestamps[i*2 + 0];
|
||||
const uint64_t end_ns = gpurofl->timestamps[i*2 + 1];
|
||||
const char *name = gpurofl->scopes[scope_index].name;
|
||||
|
||||
if (!g_speeds.frame.gpu_scopes[scope_index].initialized) {
|
||||
R_SpeedsRegisterMetric(&g_speeds.frame.gpu_scopes[scope_index].time_us, name, kSpeedsMetricMicroseconds);
|
||||
g_speeds.frame.gpu_scopes[scope_index].initialized = 1;
|
||||
if (!g_speeds.frame.gpu_scopes[scope_index].initialized) {
|
||||
R_SpeedsRegisterMetric(&g_speeds.frame.gpu_scopes[scope_index].time_us, name, kSpeedsMetricMicroseconds);
|
||||
g_speeds.frame.gpu_scopes[scope_index].initialized = 1;
|
||||
}
|
||||
|
||||
g_speeds.frame.gpu_scopes[scope_index].time_us += (end_ns - begin_ns) / 1000;
|
||||
|
||||
rgba_t color = {255, 255, 0, 127};
|
||||
getColorForString(name, color);
|
||||
drawTimeBar(frame_begin_time, time_scale_ms, begin_ns, end_ns, y + i * bar_height, bar_height, name, color);
|
||||
}
|
||||
|
||||
g_speeds.frame.gpu_scopes[scope_index].time_us += (end_ns - begin_ns) / 1000;
|
||||
|
||||
rgba_t color = {255, 255, 0, 127};
|
||||
getColorForString(name, color);
|
||||
drawTimeBar(frame_begin_time, time_scale_ms, begin_ns, end_ns, y + i * bar_height, bar_height, name, color);
|
||||
}
|
||||
}
|
||||
return y;
|
||||
|
@ -586,11 +589,14 @@ void R_SpeedsRegisterMetric(int* p_value, const char *name, r_speeds_metric_type
|
|||
}
|
||||
}
|
||||
|
||||
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl) {
|
||||
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl, int gpurofl_count) {
|
||||
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
|
||||
|
||||
const uint64_t gpu_frame_begin_ns = gpurofl->timestamps[0];
|
||||
const uint64_t gpu_frame_end_ns = gpurofl->timestamps[1];
|
||||
uint64_t gpu_frame_begin_ns = UINT64_MAX, gpu_frame_end_ns = 0;
|
||||
for (int i = 0; i < gpurofl_count; ++i) {
|
||||
gpu_frame_begin_ns = Q_min(gpu_frame_begin_ns, gpurofl[i].timestamps[0]);
|
||||
gpu_frame_end_ns = Q_max(gpu_frame_end_ns, gpurofl[i].timestamps[1]);
|
||||
}
|
||||
|
||||
// Reads current font/DPI scale, many functions below use it
|
||||
getCurrentFontMetrics();
|
||||
|
@ -614,7 +620,7 @@ void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scope
|
|||
{
|
||||
int y = 100;
|
||||
const int draw = speeds_bits & SPEEDS_BIT_FRAME;
|
||||
y = drawFrames( draw, prev_frame_index, y, gpurofl );
|
||||
y = drawFrames( draw, prev_frame_index, y, gpurofl, gpurofl_count );
|
||||
|
||||
if (draw)
|
||||
y = drawGraphs(y + 10);
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
void R_SpeedsInit( void );
|
||||
|
||||
struct vk_combuf_scopes_s;
|
||||
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl);
|
||||
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl, int gpurofl_count);
|
||||
|
||||
// Called from the engine into ref_api to get the latest speeds info
|
||||
qboolean R_SpeedsMessage( char *out, size_t size );
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "profiler.h"
|
||||
|
||||
#define MAX_COMMANDBUFFERS 4
|
||||
#define MAX_COMMANDBUFFERS 6
|
||||
#define MAX_QUERY_COUNT 128
|
||||
|
||||
#define BEGIN_INDEX_TAG 0x10000000
|
||||
|
|
|
@ -43,6 +43,8 @@ typedef struct {
|
|||
// Unfortunately waiting on semaphore also means resetting it when it is signaled
|
||||
// so we can't reuse the same one for two purposes and need to mnozhit sunchnosti
|
||||
VkSemaphore sem_done2;
|
||||
|
||||
vk_combuf_t *staging_combuf;
|
||||
} vk_framectl_frame_t;
|
||||
|
||||
static struct {
|
||||
|
@ -208,8 +210,6 @@ void R_BeginFrame( qboolean clearScene ) {
|
|||
waitForFrameFence();
|
||||
// Current command buffer is done and available
|
||||
// Previous might still be in flight
|
||||
|
||||
// TODO R_VkQueryPoolGetFrameResults(g_frame.qpools + g_frame.current.index);
|
||||
}
|
||||
|
||||
APROF_SCOPE_END(begin_frame_tail);
|
||||
|
@ -220,8 +220,12 @@ void R_BeginFrame( qboolean clearScene ) {
|
|||
APROF_SCOPE_BEGIN(begin_frame);
|
||||
|
||||
{
|
||||
const vk_combuf_scopes_t gpurofl = R_VkCombufScopesGet(frame->combuf);
|
||||
R_SpeedsDisplayMore(prev_frame_event_index, &gpurofl);
|
||||
const vk_combuf_scopes_t gpurofl[] = {
|
||||
frame->staging_combuf ? R_VkCombufScopesGet(frame->staging_combuf) : (vk_combuf_scopes_t){},
|
||||
R_VkCombufScopesGet(frame->combuf),
|
||||
};
|
||||
|
||||
R_SpeedsDisplayMore(prev_frame_event_index, frame->staging_combuf ? gpurofl : gpurofl + 1, frame->staging_combuf ? 2 : 1);
|
||||
}
|
||||
|
||||
if (vk_core.rtx && FBitSet( vk_rtx->flags, FCVAR_CHANGED )) {
|
||||
|
@ -304,6 +308,7 @@ static void enqueueRendering( vk_combuf_t* combuf ) {
|
|||
g_frame.current.phase = Phase_RenderingEnqueued;
|
||||
}
|
||||
|
||||
// FIXME pass frame, not combuf (possible desync)
|
||||
static void submit( vk_combuf_t* combuf, qboolean wait ) {
|
||||
ASSERT(g_frame.current.phase == Phase_RenderingEnqueued);
|
||||
|
||||
|
@ -314,8 +319,10 @@ static void submit( vk_combuf_t* combuf, qboolean wait ) {
|
|||
|
||||
R_VkCombufEnd(combuf);
|
||||
|
||||
frame->staging_combuf = R_VkStagingFrameEnd();
|
||||
|
||||
const VkCommandBuffer cmdbufs[] = {
|
||||
R_VkStagingFrameEnd(),
|
||||
frame->staging_combuf ? frame->staging_combuf->cmdbuf : NULL,
|
||||
cmdbuf,
|
||||
};
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "vk_rtx.h"
|
||||
#include "vk_ray_internal.h"
|
||||
#include "r_speeds.h"
|
||||
#include "vk_combuf.h"
|
||||
|
||||
#define MAX_SCRATCH_BUFFER (32*1024*1024)
|
||||
#define MAX_ACCELS_BUFFER (64*1024*1024)
|
||||
|
@ -26,7 +27,7 @@ static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
|
|||
}
|
||||
|
||||
// TODO split this into smaller building blocks in a separate module
|
||||
qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_build_args_t *args, vk_ray_model_t *model) {
|
||||
qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args, vk_ray_model_t *model) {
|
||||
qboolean should_create = *args->p_accel == VK_NULL_HANDLE;
|
||||
#if 1 // update does not work at all on AMD gpus
|
||||
qboolean is_update = false; // FIXME this crashes for some reason !should_create && args->dynamic;
|
||||
|
@ -105,7 +106,7 @@ qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_bu
|
|||
}
|
||||
|
||||
// If not enough data for building, just create
|
||||
if (!cmdbuf || !args->build_ranges)
|
||||
if (!combuf || !args->build_ranges)
|
||||
return true;
|
||||
|
||||
if (model) {
|
||||
|
@ -121,11 +122,17 @@ qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_bu
|
|||
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
|
||||
|
||||
g_accel_.stats.accels_built++;
|
||||
vkCmdBuildAccelerationStructuresKHR(cmdbuf, 1, &build_info, &args->build_ranges);
|
||||
|
||||
static int scope_id = -2;
|
||||
if (scope_id == -2)
|
||||
scope_id = R_VkGpuScope_Register("build_as");
|
||||
const int begin_index = R_VkCombufScopeBegin(combuf, scope_id);
|
||||
vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, &build_info, &args->build_ranges);
|
||||
R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void createTlas( VkCommandBuffer cmdbuf, VkDeviceAddress instances_addr ) {
|
||||
static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) {
|
||||
const VkAccelerationStructureGeometryKHR tl_geom[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
|
||||
|
@ -146,7 +153,7 @@ static void createTlas( VkCommandBuffer cmdbuf, VkDeviceAddress instances_addr )
|
|||
const as_build_args_t asrgs = {
|
||||
.geoms = tl_geom,
|
||||
.max_prim_counts = tl_max_prim_counts,
|
||||
.build_ranges = cmdbuf == VK_NULL_HANDLE ? NULL : &tl_build_range,
|
||||
.build_ranges = !combuf ? NULL : &tl_build_range,
|
||||
.n_geoms = COUNTOF(tl_geom),
|
||||
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
|
||||
// we can't really rebuild TLAS because instance count changes are not allowed .dynamic = true,
|
||||
|
@ -154,15 +161,15 @@ static void createTlas( VkCommandBuffer cmdbuf, VkDeviceAddress instances_addr )
|
|||
.p_accel = &g_accel.tlas,
|
||||
.debug_name = "TLAS",
|
||||
};
|
||||
if (!createOrUpdateAccelerationStructure(cmdbuf, &asrgs, NULL)) {
|
||||
if (!createOrUpdateAccelerationStructure(combuf, &asrgs, NULL)) {
|
||||
gEngine.Host_Error("Could not create/update TLAS\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void RT_VkAccelPrepareTlas(VkCommandBuffer cmdbuf) {
|
||||
void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
|
||||
ASSERT(g_ray_model_state.frame.num_models > 0);
|
||||
DEBUG_BEGIN(cmdbuf, "prepare tlas");
|
||||
DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas");
|
||||
|
||||
R_FlippingBuffer_Flip( &g_accel.tlas_geom_buffer_alloc );
|
||||
|
||||
|
@ -220,15 +227,15 @@ void RT_VkAccelPrepareTlas(VkCommandBuffer cmdbuf) {
|
|||
.offset = instance_offset * sizeof(VkAccelerationStructureInstanceKHR),
|
||||
.size = g_ray_model_state.frame.num_models * sizeof(VkAccelerationStructureInstanceKHR),
|
||||
} };
|
||||
vkCmdPipelineBarrier(cmdbuf,
|
||||
vkCmdPipelineBarrier(combuf->cmdbuf,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL);
|
||||
}
|
||||
|
||||
// 2. Build TLAS
|
||||
createTlas(cmdbuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR));
|
||||
DEBUG_END(cmdbuf);
|
||||
createTlas(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR));
|
||||
DEBUG_END(combuf->cmdbuf);
|
||||
}
|
||||
|
||||
qboolean RT_VkAccelInit(void) {
|
||||
|
|
|
@ -41,4 +41,5 @@ qboolean RT_VkAccelInit(void);
|
|||
void RT_VkAccelShutdown(void);
|
||||
void RT_VkAccelNewMap(void);
|
||||
void RT_VkAccelFrameBegin(void);
|
||||
void RT_VkAccelPrepareTlas(VkCommandBuffer cmdbuf);
|
||||
struct vk_combuf_s;
|
||||
void RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf);
|
||||
|
|
|
@ -51,7 +51,8 @@ typedef struct {
|
|||
qboolean dynamic;
|
||||
} as_build_args_t;
|
||||
|
||||
qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_build_args_t *args, vk_ray_model_t *model);
|
||||
struct vk_combuf_s;
|
||||
qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args, vk_ray_model_t *model);
|
||||
|
||||
typedef struct {
|
||||
// Geometry metadata. Lifetime is similar to geometry lifetime itself.
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "vk_staging.h"
|
||||
#include "vk_light.h"
|
||||
#include "vk_math.h"
|
||||
#include "vk_combuf.h"
|
||||
|
||||
#include "eiface.h"
|
||||
#include "xash3d_mathlib.h"
|
||||
|
@ -324,8 +325,8 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) {
|
|||
|
||||
R_VkStagingUnlock(kusok_staging.handle);
|
||||
|
||||
// FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too
|
||||
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
|
||||
// FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too
|
||||
vk_combuf_t *const combuf = R_VkStagingCommit();
|
||||
{
|
||||
const VkBufferMemoryBarrier bmb[] = { {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
|
@ -344,7 +345,7 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) {
|
|||
.offset = staging_args.offset,
|
||||
.size = staging_args.size,
|
||||
} };
|
||||
vkCmdPipelineBarrier(cmdbuf,
|
||||
vkCmdPipelineBarrier(combuf->cmdbuf,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
//VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
|
||||
|
@ -368,9 +369,9 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) {
|
|||
qboolean result;
|
||||
asrgs.p_accel = &ray_model->as;
|
||||
|
||||
DEBUG_BEGINF(cmdbuf, "build blas for %s", args.model->debug_name);
|
||||
result = createOrUpdateAccelerationStructure(cmdbuf, &asrgs, ray_model);
|
||||
DEBUG_END(cmdbuf);
|
||||
DEBUG_BEGINF(combuf->cmdbuf, "build blas for %s", args.model->debug_name);
|
||||
result = createOrUpdateAccelerationStructure(combuf, &asrgs, ray_model);
|
||||
DEBUG_END(combuf->cmdbuf);
|
||||
|
||||
if (!result)
|
||||
{
|
||||
|
|
|
@ -276,7 +276,7 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a
|
|||
}
|
||||
|
||||
DEBUG_BEGIN(cmdbuf, "yay tracing");
|
||||
RT_VkAccelPrepareTlas(cmdbuf);
|
||||
RT_VkAccelPrepareTlas(combuf);
|
||||
prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y);
|
||||
|
||||
// 4. Barrier for TLAS build
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "vk_commandpool.h"
|
||||
#include "profiler.h"
|
||||
#include "r_speeds.h"
|
||||
#include "vk_combuf.h"
|
||||
|
||||
#include <memory.h>
|
||||
|
||||
|
@ -34,8 +35,10 @@ static struct {
|
|||
int count;
|
||||
} images;
|
||||
|
||||
vk_command_pool_t upload_pool;
|
||||
VkCommandBuffer cmdbuf;
|
||||
vk_combuf_t *combuf[3];
|
||||
|
||||
// Currently opened command buffer, ready to accept new commands
|
||||
vk_combuf_t *current;
|
||||
|
||||
struct {
|
||||
int total_size;
|
||||
|
@ -50,7 +53,9 @@ qboolean R_VkStagingInit(void) {
|
|||
if (!VK_BufferCreate("staging", &g_staging.buffer, DEFAULT_STAGING_SIZE, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||
return false;
|
||||
|
||||
g_staging.upload_pool = R_VkCommandPoolCreate( COMMAND_BUFFER_COUNT );
|
||||
g_staging.combuf[0] = R_VkCombufOpen();
|
||||
g_staging.combuf[1] = R_VkCombufOpen();
|
||||
g_staging.combuf[2] = R_VkCombufOpen();
|
||||
|
||||
R_FlippingBuffer_Init(&g_staging.buffer_alloc, DEFAULT_STAGING_SIZE);
|
||||
|
||||
|
@ -66,7 +71,6 @@ qboolean R_VkStagingInit(void) {
|
|||
|
||||
void R_VkStagingShutdown(void) {
|
||||
VK_BufferDestroy(&g_staging.buffer);
|
||||
R_VkCommandPoolDestroy( &g_staging.upload_pool );
|
||||
}
|
||||
|
||||
// FIXME There's a severe race condition here. Submitting things manually and prematurely (before framectl had a chance to synchronize with the previous frame)
|
||||
|
@ -74,12 +78,12 @@ void R_VkStagingShutdown(void) {
|
|||
void R_VkStagingFlushSync( void ) {
|
||||
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
|
||||
|
||||
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
|
||||
if (!cmdbuf)
|
||||
vk_combuf_t *combuf = R_VkStagingCommit();
|
||||
if (!combuf)
|
||||
goto end;
|
||||
|
||||
XVK_CHECK(vkEndCommandBuffer(cmdbuf));
|
||||
g_staging.cmdbuf = VK_NULL_HANDLE;
|
||||
R_VkCombufEnd(combuf);
|
||||
g_staging.current = NULL;
|
||||
|
||||
//gEngine.Con_Reportf(S_WARN "flushing staging buffer img count=%d\n", g_staging.images.count);
|
||||
|
||||
|
@ -87,12 +91,14 @@ void R_VkStagingFlushSync( void ) {
|
|||
const VkSubmitInfo subinfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.pCommandBuffers = &combuf->cmdbuf,
|
||||
};
|
||||
|
||||
// TODO wait for previous command buffer completion. Why: we might end up writing into the same dst
|
||||
|
||||
XVK_CHECK(vkQueueSubmit(vk_core.queue, 1, &subinfo, VK_NULL_HANDLE));
|
||||
|
||||
// TODO wait for fence, not this
|
||||
XVK_CHECK(vkQueueWaitIdle(vk_core.queue));
|
||||
}
|
||||
|
||||
|
@ -173,7 +179,11 @@ void R_VkStagingUnlock(staging_handle_t handle) {
|
|||
// FIXME mark and check ready
|
||||
}
|
||||
|
||||
static void commitBuffers(VkCommandBuffer cmdbuf) {
|
||||
static void commitBuffers(void) {
|
||||
const VkCommandBuffer cmdbuf = g_staging.current->cmdbuf;
|
||||
|
||||
// TODO combuf scopes
|
||||
|
||||
// TODO better coalescing:
|
||||
// - upload once per buffer
|
||||
// - join adjacent regions
|
||||
|
@ -214,7 +224,8 @@ static void commitBuffers(VkCommandBuffer cmdbuf) {
|
|||
g_staging.buffers.count = 0;
|
||||
}
|
||||
|
||||
static void commitImages(VkCommandBuffer cmdbuf) {
|
||||
static void commitImages(void) {
|
||||
const VkCommandBuffer cmdbuf = g_staging.current->cmdbuf;
|
||||
for (int i = 0; i < g_staging.images.count; i++) {
|
||||
/* { */
|
||||
/* const VkBufferImageCopy *const copy = g_staging.images.copy + i; */
|
||||
|
@ -233,29 +244,27 @@ static void commitImages(VkCommandBuffer cmdbuf) {
|
|||
g_staging.images.count = 0;
|
||||
}
|
||||
|
||||
VkCommandBuffer R_VkStagingGetCommandBuffer(void) {
|
||||
if (g_staging.cmdbuf)
|
||||
return g_staging.cmdbuf;
|
||||
static vk_combuf_t *getCurrentCombuf(void) {
|
||||
if (!g_staging.current) {
|
||||
g_staging.current = g_staging.combuf[0];
|
||||
R_VkCombufBegin(g_staging.current);
|
||||
}
|
||||
|
||||
g_staging.cmdbuf = g_staging.upload_pool.buffers[0];
|
||||
|
||||
const VkCommandBufferBeginInfo beginfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
};
|
||||
XVK_CHECK(vkBeginCommandBuffer(g_staging.cmdbuf, &beginfo));
|
||||
|
||||
return g_staging.cmdbuf;
|
||||
return g_staging.current;
|
||||
}
|
||||
|
||||
VkCommandBuffer R_VkStagingCommit(void) {
|
||||
if (!g_staging.images.count && !g_staging.buffers.count && !g_staging.cmdbuf)
|
||||
VkCommandBuffer R_VkStagingGetCommandBuffer(void) {
|
||||
return getCurrentCombuf()->cmdbuf;
|
||||
}
|
||||
|
||||
vk_combuf_t *R_VkStagingCommit(void) {
|
||||
if (!g_staging.images.count && !g_staging.buffers.count && !g_staging.current)
|
||||
return VK_NULL_HANDLE;
|
||||
|
||||
const VkCommandBuffer cmdbuf = R_VkStagingGetCommandBuffer();
|
||||
commitBuffers(cmdbuf);
|
||||
commitImages(cmdbuf);
|
||||
return cmdbuf;
|
||||
getCurrentCombuf();
|
||||
commitBuffers();
|
||||
commitImages();
|
||||
return g_staging.current;
|
||||
}
|
||||
|
||||
void R_VkStagingFrameBegin(void) {
|
||||
|
@ -265,19 +274,21 @@ void R_VkStagingFrameBegin(void) {
|
|||
g_staging.images.count = 0;
|
||||
}
|
||||
|
||||
VkCommandBuffer R_VkStagingFrameEnd(void) {
|
||||
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
|
||||
if (cmdbuf)
|
||||
XVK_CHECK(vkEndCommandBuffer(cmdbuf));
|
||||
vk_combuf_t *R_VkStagingFrameEnd(void) {
|
||||
R_VkStagingCommit();
|
||||
vk_combuf_t *current = g_staging.current;
|
||||
|
||||
g_staging.cmdbuf = VK_NULL_HANDLE;
|
||||
if (current) {
|
||||
R_VkCombufEnd(g_staging.current);
|
||||
}
|
||||
|
||||
const VkCommandBuffer tmp = g_staging.upload_pool.buffers[0];
|
||||
g_staging.upload_pool.buffers[0] = g_staging.upload_pool.buffers[1];
|
||||
g_staging.upload_pool.buffers[1] = g_staging.upload_pool.buffers[2];
|
||||
g_staging.upload_pool.buffers[2] = tmp;
|
||||
g_staging.current = NULL;
|
||||
vk_combuf_t *const tmp = g_staging.combuf[0];
|
||||
g_staging.combuf[0] = g_staging.combuf[1];
|
||||
g_staging.combuf[1] = g_staging.combuf[2];
|
||||
g_staging.combuf[2] = tmp;
|
||||
|
||||
g_staging.stats.total_size = g_staging.stats.images_size + g_staging.stats.buffers_size;
|
||||
|
||||
return cmdbuf;
|
||||
return current;
|
||||
}
|
||||
|
|
|
@ -35,14 +35,14 @@ vk_staging_region_t R_VkStagingLockForImage(vk_staging_image_args_t args);
|
|||
void R_VkStagingUnlock(staging_handle_t handle);
|
||||
|
||||
// Append copy commands to command buffer.
|
||||
VkCommandBuffer R_VkStagingCommit(void);
|
||||
struct vk_combuf_s* R_VkStagingCommit(void);
|
||||
|
||||
// Mark previous frame data as uploaded and safe to use.
|
||||
void R_VkStagingFrameBegin(void);
|
||||
|
||||
// Uploads staging contents and returns the command buffer ready to be submitted.
|
||||
// Can return NULL if there's nothing to upload.
|
||||
VkCommandBuffer R_VkStagingFrameEnd(void);
|
||||
struct vk_combuf_s *R_VkStagingFrameEnd(void);
|
||||
|
||||
// Gets the current command buffer.
|
||||
// WARNING: Can be invalidated by any of the Lock calls
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "vk_const.h"
|
||||
#include "vk_descriptor.h"
|
||||
#include "vk_mapents.h" // wadlist
|
||||
#include "vk_combuf.h"
|
||||
|
||||
#include "xash3d_mathlib.h"
|
||||
#include "crtlib.h"
|
||||
|
@ -672,7 +673,10 @@ static qboolean uploadTexture(vk_texture_t *tex, rgbdata_t *const *const layers,
|
|||
}
|
||||
}
|
||||
|
||||
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
|
||||
// TODO Don't change layout here. Alternatively:
|
||||
// I. Attach layout metadata to the image, and request its change next time it is used.
|
||||
// II. Build-in layout transfer to staging commit and do it there on commit.
|
||||
const VkCommandBuffer cmdbuf = R_VkStagingCommit()->cmdbuf;
|
||||
|
||||
// 5.2 image:layout:DST -> image:layout:SAMPLED
|
||||
// 5.2.1 transitionToLayout(DST -> SHADER_READ_ONLY)
|
||||
|
@ -689,7 +693,7 @@ static qboolean uploadTexture(vk_texture_t *tex, rgbdata_t *const *const layers,
|
|||
};
|
||||
vkCmdPipelineBarrier(cmdbuf,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // FIXME incorrect, we also use them in compute and potentially ray tracing shaders
|
||||
0, 0, NULL, 0, NULL, 1, &image_barrier);
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue