vk: profiler: cover staging with combuf scopes; add build_as time

This commit is contained in:
Ivan Avdeev 2023-04-07 11:14:41 -07:00 committed by Ivan Avdeev
parent f6201e460f
commit 92ce698292
13 changed files with 169 additions and 89 deletions

View File

@ -26,4 +26,46 @@
- **(ref) R_AllowFog(true)**
- **(ref) R_EndFrame()**
# Staging and multiple command buffers
We want to get rid of extra command buffers used for staging (and building blases). That would mean tying any command-buffer related things in there to framectl.
However, there are several staging cmdbuf usages which are technically out-of-band wrt framectl:
0. Staging data can get full, which requires sync flush: filling cmdbuf outside of frame (or while still building a frame), submitting it and waiting on it.
1. Texture uploading. There's an explicit usage of staging cmdbuf in vk_texture to do layout transfer. This layout transfer can be moved to staging itself.
2. BLAS building. Creating a ray model uploads its geometry via staging and then immediately builds its BLAS on the same staging cmdbuf. Ideally(?), we'd like to split BLAS building to some later stage to do it in bulk.
# OpenGL-like immediate mode rendering, ~TriApi
## Functions:
R_Set2DMode(bool) -- switches between 3D scene and 2D overlay modes; used in engine
R_DrawStretchRaw,
R_DrawStretchPic,
R_DrawTileClear,
CL_FillRGBA,
CL_FillRGBABlend,
R_AllowFog,
GL_SetRenderMode,
void (*GL_Bind)( int tmu, unsigned int texnum );
void (*GL_SelectTexture)( int tmu );
void (*GL_LoadTextureMatrix)( const float *glmatrix ); -- exported to the game, not used in engine
void (*GL_TexMatrixIdentity)( void ); -- exported to the game, not used in engine
void (*GL_CleanUpTextureUnits)( int last ); // pass 0 for clear all the texture units
void (*GL_TexGen)( unsigned int coord, unsigned int mode );
void (*GL_TextureTarget)( unsigned int target ); // change texture unit mode without bind texture
void (*GL_TexCoordArrayMode)( unsigned int texmode );
void (*GL_UpdateTexSize)( int texnum, int width, int height, int depth ); // recalc statistics
TriRenderMode,
TriBegin,
TriEnd,
TriColor4f,
TriColor4ub,
TriTexCoord2f,
TriVertex3fv,
TriVertex3f,
TriFog,
TriGetMatrix,
TriFogParams,
TriCullFace,

View File

@ -345,7 +345,7 @@ static int drawGraph( r_speeds_graph_t *const graph, int frame_bar_y ) {
return frame_bar_y;
}
static int drawFrames( int draw, uint32_t prev_frame_index, int y, const vk_combuf_scopes_t *gpurofl) {
static int drawFrames( int draw, uint32_t prev_frame_index, int y, const vk_combuf_scopes_t *gpurofls, int gpurofls_count) {
// Draw latest 2 frames; find their boundaries
uint32_t rewind_frame = prev_frame_index;
const int max_frames_to_draw = 2;
@ -378,22 +378,25 @@ static int drawFrames( int draw, uint32_t prev_frame_index, int y, const vk_comb
y += g_speeds.font_metrics.glyph_height * 6;
const int bar_height = g_speeds.font_metrics.glyph_height;
for (int i = 0; i < gpurofl->entries_count; ++i) {
const int scope_index = gpurofl->entries[i];
const uint64_t begin_ns = gpurofl->timestamps[scope_index*2 + 0];
const uint64_t end_ns = gpurofl->timestamps[scope_index*2 + 1];
const char *name = gpurofl->scopes[scope_index].name;
for (int j = 0; j < gpurofls_count; ++j) {
const vk_combuf_scopes_t *const gpurofl = gpurofls + j;
for (int i = 0; i < gpurofl->entries_count; ++i) {
const int scope_index = gpurofl->entries[i];
const uint64_t begin_ns = gpurofl->timestamps[i*2 + 0];
const uint64_t end_ns = gpurofl->timestamps[i*2 + 1];
const char *name = gpurofl->scopes[scope_index].name;
if (!g_speeds.frame.gpu_scopes[scope_index].initialized) {
R_SpeedsRegisterMetric(&g_speeds.frame.gpu_scopes[scope_index].time_us, name, kSpeedsMetricMicroseconds);
g_speeds.frame.gpu_scopes[scope_index].initialized = 1;
if (!g_speeds.frame.gpu_scopes[scope_index].initialized) {
R_SpeedsRegisterMetric(&g_speeds.frame.gpu_scopes[scope_index].time_us, name, kSpeedsMetricMicroseconds);
g_speeds.frame.gpu_scopes[scope_index].initialized = 1;
}
g_speeds.frame.gpu_scopes[scope_index].time_us += (end_ns - begin_ns) / 1000;
rgba_t color = {255, 255, 0, 127};
getColorForString(name, color);
drawTimeBar(frame_begin_time, time_scale_ms, begin_ns, end_ns, y + i * bar_height, bar_height, name, color);
}
g_speeds.frame.gpu_scopes[scope_index].time_us += (end_ns - begin_ns) / 1000;
rgba_t color = {255, 255, 0, 127};
getColorForString(name, color);
drawTimeBar(frame_begin_time, time_scale_ms, begin_ns, end_ns, y + i * bar_height, bar_height, name, color);
}
}
return y;
@ -586,11 +589,14 @@ void R_SpeedsRegisterMetric(int* p_value, const char *name, r_speeds_metric_type
}
}
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl) {
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl, int gpurofl_count) {
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
const uint64_t gpu_frame_begin_ns = gpurofl->timestamps[0];
const uint64_t gpu_frame_end_ns = gpurofl->timestamps[1];
uint64_t gpu_frame_begin_ns = UINT64_MAX, gpu_frame_end_ns = 0;
for (int i = 0; i < gpurofl_count; ++i) {
gpu_frame_begin_ns = Q_min(gpu_frame_begin_ns, gpurofl[i].timestamps[0]);
gpu_frame_end_ns = Q_max(gpu_frame_end_ns, gpurofl[i].timestamps[1]);
}
// Reads current font/DPI scale, many functions below use it
getCurrentFontMetrics();
@ -614,7 +620,7 @@ void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scope
{
int y = 100;
const int draw = speeds_bits & SPEEDS_BIT_FRAME;
y = drawFrames( draw, prev_frame_index, y, gpurofl );
y = drawFrames( draw, prev_frame_index, y, gpurofl, gpurofl_count );
if (draw)
y = drawGraphs(y + 10);

View File

@ -5,7 +5,7 @@
void R_SpeedsInit( void );
struct vk_combuf_scopes_s;
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl);
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl, int gpurofl_count);
// Called from the engine into ref_api to get the latest speeds info
qboolean R_SpeedsMessage( char *out, size_t size );

View File

@ -3,7 +3,7 @@
#include "profiler.h"
#define MAX_COMMANDBUFFERS 4
#define MAX_COMMANDBUFFERS 6
#define MAX_QUERY_COUNT 128
#define BEGIN_INDEX_TAG 0x10000000

View File

@ -43,6 +43,8 @@ typedef struct {
// Unfortunately waiting on semaphore also means resetting it when it is signaled
// so we can't reuse the same one for two purposes and need to mnozhit sunchnosti
VkSemaphore sem_done2;
vk_combuf_t *staging_combuf;
} vk_framectl_frame_t;
static struct {
@ -208,8 +210,6 @@ void R_BeginFrame( qboolean clearScene ) {
waitForFrameFence();
// Current command buffer is done and available
// Previous might still be in flight
// TODO R_VkQueryPoolGetFrameResults(g_frame.qpools + g_frame.current.index);
}
APROF_SCOPE_END(begin_frame_tail);
@ -220,8 +220,12 @@ void R_BeginFrame( qboolean clearScene ) {
APROF_SCOPE_BEGIN(begin_frame);
{
const vk_combuf_scopes_t gpurofl = R_VkCombufScopesGet(frame->combuf);
R_SpeedsDisplayMore(prev_frame_event_index, &gpurofl);
const vk_combuf_scopes_t gpurofl[] = {
frame->staging_combuf ? R_VkCombufScopesGet(frame->staging_combuf) : (vk_combuf_scopes_t){},
R_VkCombufScopesGet(frame->combuf),
};
R_SpeedsDisplayMore(prev_frame_event_index, frame->staging_combuf ? gpurofl : gpurofl + 1, frame->staging_combuf ? 2 : 1);
}
if (vk_core.rtx && FBitSet( vk_rtx->flags, FCVAR_CHANGED )) {
@ -304,6 +308,7 @@ static void enqueueRendering( vk_combuf_t* combuf ) {
g_frame.current.phase = Phase_RenderingEnqueued;
}
// FIXME pass frame, not combuf (possible desync)
static void submit( vk_combuf_t* combuf, qboolean wait ) {
ASSERT(g_frame.current.phase == Phase_RenderingEnqueued);
@ -314,8 +319,10 @@ static void submit( vk_combuf_t* combuf, qboolean wait ) {
R_VkCombufEnd(combuf);
frame->staging_combuf = R_VkStagingFrameEnd();
const VkCommandBuffer cmdbufs[] = {
R_VkStagingFrameEnd(),
frame->staging_combuf ? frame->staging_combuf->cmdbuf : NULL,
cmdbuf,
};

View File

@ -4,6 +4,7 @@
#include "vk_rtx.h"
#include "vk_ray_internal.h"
#include "r_speeds.h"
#include "vk_combuf.h"
#define MAX_SCRATCH_BUFFER (32*1024*1024)
#define MAX_ACCELS_BUFFER (64*1024*1024)
@ -26,7 +27,7 @@ static VkDeviceAddress getASAddress(VkAccelerationStructureKHR as) {
}
// TODO split this into smaller building blocks in a separate module
qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_build_args_t *args, vk_ray_model_t *model) {
qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args, vk_ray_model_t *model) {
qboolean should_create = *args->p_accel == VK_NULL_HANDLE;
#if 1 // update does not work at all on AMD gpus
qboolean is_update = false; // FIXME this crashes for some reason !should_create && args->dynamic;
@ -105,7 +106,7 @@ qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_bu
}
// If not enough data for building, just create
if (!cmdbuf || !args->build_ranges)
if (!combuf || !args->build_ranges)
return true;
if (model) {
@ -121,11 +122,17 @@ qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_bu
//gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x\n", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size);
g_accel_.stats.accels_built++;
vkCmdBuildAccelerationStructuresKHR(cmdbuf, 1, &build_info, &args->build_ranges);
static int scope_id = -2;
if (scope_id == -2)
scope_id = R_VkGpuScope_Register("build_as");
const int begin_index = R_VkCombufScopeBegin(combuf, scope_id);
vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, &build_info, &args->build_ranges);
R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR);
return true;
}
static void createTlas( VkCommandBuffer cmdbuf, VkDeviceAddress instances_addr ) {
static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) {
const VkAccelerationStructureGeometryKHR tl_geom[] = {
{
.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR,
@ -146,7 +153,7 @@ static void createTlas( VkCommandBuffer cmdbuf, VkDeviceAddress instances_addr )
const as_build_args_t asrgs = {
.geoms = tl_geom,
.max_prim_counts = tl_max_prim_counts,
.build_ranges = cmdbuf == VK_NULL_HANDLE ? NULL : &tl_build_range,
.build_ranges = !combuf ? NULL : &tl_build_range,
.n_geoms = COUNTOF(tl_geom),
.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
// we can't really rebuild TLAS because instance count changes are not allowed .dynamic = true,
@ -154,15 +161,15 @@ static void createTlas( VkCommandBuffer cmdbuf, VkDeviceAddress instances_addr )
.p_accel = &g_accel.tlas,
.debug_name = "TLAS",
};
if (!createOrUpdateAccelerationStructure(cmdbuf, &asrgs, NULL)) {
if (!createOrUpdateAccelerationStructure(combuf, &asrgs, NULL)) {
gEngine.Host_Error("Could not create/update TLAS\n");
return;
}
}
void RT_VkAccelPrepareTlas(VkCommandBuffer cmdbuf) {
void RT_VkAccelPrepareTlas(vk_combuf_t *combuf) {
ASSERT(g_ray_model_state.frame.num_models > 0);
DEBUG_BEGIN(cmdbuf, "prepare tlas");
DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas");
R_FlippingBuffer_Flip( &g_accel.tlas_geom_buffer_alloc );
@ -220,15 +227,15 @@ void RT_VkAccelPrepareTlas(VkCommandBuffer cmdbuf) {
.offset = instance_offset * sizeof(VkAccelerationStructureInstanceKHR),
.size = g_ray_model_state.frame.num_models * sizeof(VkAccelerationStructureInstanceKHR),
} };
vkCmdPipelineBarrier(cmdbuf,
vkCmdPipelineBarrier(combuf->cmdbuf,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL);
}
// 2. Build TLAS
createTlas(cmdbuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR));
DEBUG_END(cmdbuf);
createTlas(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR));
DEBUG_END(combuf->cmdbuf);
}
qboolean RT_VkAccelInit(void) {

View File

@ -41,4 +41,5 @@ qboolean RT_VkAccelInit(void);
void RT_VkAccelShutdown(void);
void RT_VkAccelNewMap(void);
void RT_VkAccelFrameBegin(void);
void RT_VkAccelPrepareTlas(VkCommandBuffer cmdbuf);
struct vk_combuf_s;
void RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf);

View File

@ -51,7 +51,8 @@ typedef struct {
qboolean dynamic;
} as_build_args_t;
qboolean createOrUpdateAccelerationStructure(VkCommandBuffer cmdbuf, const as_build_args_t *args, vk_ray_model_t *model);
struct vk_combuf_s;
qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args, vk_ray_model_t *model);
typedef struct {
// Geometry metadata. Lifetime is similar to geometry lifetime itself.

View File

@ -8,6 +8,7 @@
#include "vk_staging.h"
#include "vk_light.h"
#include "vk_math.h"
#include "vk_combuf.h"
#include "eiface.h"
#include "xash3d_mathlib.h"
@ -324,8 +325,8 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) {
R_VkStagingUnlock(kusok_staging.handle);
// FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
// FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too
vk_combuf_t *const combuf = R_VkStagingCommit();
{
const VkBufferMemoryBarrier bmb[] = { {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
@ -344,7 +345,7 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) {
.offset = staging_args.offset,
.size = staging_args.size,
} };
vkCmdPipelineBarrier(cmdbuf,
vkCmdPipelineBarrier(combuf->cmdbuf,
VK_PIPELINE_STAGE_TRANSFER_BIT,
//VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
@ -368,9 +369,9 @@ vk_ray_model_t* VK_RayModelCreate( vk_ray_model_init_t args ) {
qboolean result;
asrgs.p_accel = &ray_model->as;
DEBUG_BEGINF(cmdbuf, "build blas for %s", args.model->debug_name);
result = createOrUpdateAccelerationStructure(cmdbuf, &asrgs, ray_model);
DEBUG_END(cmdbuf);
DEBUG_BEGINF(combuf->cmdbuf, "build blas for %s", args.model->debug_name);
result = createOrUpdateAccelerationStructure(combuf, &asrgs, ray_model);
DEBUG_END(combuf->cmdbuf);
if (!result)
{

View File

@ -276,7 +276,7 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a
}
DEBUG_BEGIN(cmdbuf, "yay tracing");
RT_VkAccelPrepareTlas(cmdbuf);
RT_VkAccelPrepareTlas(combuf);
prepareUniformBuffer(args->render_args, args->frame_index, args->fov_angle_y);
// 4. Barrier for TLAS build

View File

@ -4,6 +4,7 @@
#include "vk_commandpool.h"
#include "profiler.h"
#include "r_speeds.h"
#include "vk_combuf.h"
#include <memory.h>
@ -34,8 +35,10 @@ static struct {
int count;
} images;
vk_command_pool_t upload_pool;
VkCommandBuffer cmdbuf;
vk_combuf_t *combuf[3];
// Currently opened command buffer, ready to accept new commands
vk_combuf_t *current;
struct {
int total_size;
@ -50,7 +53,9 @@ qboolean R_VkStagingInit(void) {
if (!VK_BufferCreate("staging", &g_staging.buffer, DEFAULT_STAGING_SIZE, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
return false;
g_staging.upload_pool = R_VkCommandPoolCreate( COMMAND_BUFFER_COUNT );
g_staging.combuf[0] = R_VkCombufOpen();
g_staging.combuf[1] = R_VkCombufOpen();
g_staging.combuf[2] = R_VkCombufOpen();
R_FlippingBuffer_Init(&g_staging.buffer_alloc, DEFAULT_STAGING_SIZE);
@ -66,7 +71,6 @@ qboolean R_VkStagingInit(void) {
void R_VkStagingShutdown(void) {
VK_BufferDestroy(&g_staging.buffer);
R_VkCommandPoolDestroy( &g_staging.upload_pool );
}
// FIXME There's a severe race condition here. Submitting things manually and prematurely (before framectl had a chance to synchronize with the previous frame)
@ -74,12 +78,12 @@ void R_VkStagingShutdown(void) {
void R_VkStagingFlushSync( void ) {
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
if (!cmdbuf)
vk_combuf_t *combuf = R_VkStagingCommit();
if (!combuf)
goto end;
XVK_CHECK(vkEndCommandBuffer(cmdbuf));
g_staging.cmdbuf = VK_NULL_HANDLE;
R_VkCombufEnd(combuf);
g_staging.current = NULL;
//gEngine.Con_Reportf(S_WARN "flushing staging buffer img count=%d\n", g_staging.images.count);
@ -87,12 +91,14 @@ void R_VkStagingFlushSync( void ) {
const VkSubmitInfo subinfo = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &cmdbuf,
.pCommandBuffers = &combuf->cmdbuf,
};
// TODO wait for previous command buffer completion. Why: we might end up writing into the same dst
XVK_CHECK(vkQueueSubmit(vk_core.queue, 1, &subinfo, VK_NULL_HANDLE));
// TODO wait for fence, not this
XVK_CHECK(vkQueueWaitIdle(vk_core.queue));
}
@ -173,7 +179,11 @@ void R_VkStagingUnlock(staging_handle_t handle) {
// FIXME mark and check ready
}
static void commitBuffers(VkCommandBuffer cmdbuf) {
static void commitBuffers(void) {
const VkCommandBuffer cmdbuf = g_staging.current->cmdbuf;
// TODO combuf scopes
// TODO better coalescing:
// - upload once per buffer
// - join adjacent regions
@ -214,7 +224,8 @@ static void commitBuffers(VkCommandBuffer cmdbuf) {
g_staging.buffers.count = 0;
}
static void commitImages(VkCommandBuffer cmdbuf) {
static void commitImages(void) {
const VkCommandBuffer cmdbuf = g_staging.current->cmdbuf;
for (int i = 0; i < g_staging.images.count; i++) {
/* { */
/* const VkBufferImageCopy *const copy = g_staging.images.copy + i; */
@ -233,29 +244,27 @@ static void commitImages(VkCommandBuffer cmdbuf) {
g_staging.images.count = 0;
}
VkCommandBuffer R_VkStagingGetCommandBuffer(void) {
if (g_staging.cmdbuf)
return g_staging.cmdbuf;
static vk_combuf_t *getCurrentCombuf(void) {
if (!g_staging.current) {
g_staging.current = g_staging.combuf[0];
R_VkCombufBegin(g_staging.current);
}
g_staging.cmdbuf = g_staging.upload_pool.buffers[0];
const VkCommandBufferBeginInfo beginfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
XVK_CHECK(vkBeginCommandBuffer(g_staging.cmdbuf, &beginfo));
return g_staging.cmdbuf;
return g_staging.current;
}
VkCommandBuffer R_VkStagingCommit(void) {
if (!g_staging.images.count && !g_staging.buffers.count && !g_staging.cmdbuf)
VkCommandBuffer R_VkStagingGetCommandBuffer(void) {
return getCurrentCombuf()->cmdbuf;
}
vk_combuf_t *R_VkStagingCommit(void) {
if (!g_staging.images.count && !g_staging.buffers.count && !g_staging.current)
return VK_NULL_HANDLE;
const VkCommandBuffer cmdbuf = R_VkStagingGetCommandBuffer();
commitBuffers(cmdbuf);
commitImages(cmdbuf);
return cmdbuf;
getCurrentCombuf();
commitBuffers();
commitImages();
return g_staging.current;
}
void R_VkStagingFrameBegin(void) {
@ -265,19 +274,21 @@ void R_VkStagingFrameBegin(void) {
g_staging.images.count = 0;
}
VkCommandBuffer R_VkStagingFrameEnd(void) {
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
if (cmdbuf)
XVK_CHECK(vkEndCommandBuffer(cmdbuf));
vk_combuf_t *R_VkStagingFrameEnd(void) {
R_VkStagingCommit();
vk_combuf_t *current = g_staging.current;
g_staging.cmdbuf = VK_NULL_HANDLE;
if (current) {
R_VkCombufEnd(g_staging.current);
}
const VkCommandBuffer tmp = g_staging.upload_pool.buffers[0];
g_staging.upload_pool.buffers[0] = g_staging.upload_pool.buffers[1];
g_staging.upload_pool.buffers[1] = g_staging.upload_pool.buffers[2];
g_staging.upload_pool.buffers[2] = tmp;
g_staging.current = NULL;
vk_combuf_t *const tmp = g_staging.combuf[0];
g_staging.combuf[0] = g_staging.combuf[1];
g_staging.combuf[1] = g_staging.combuf[2];
g_staging.combuf[2] = tmp;
g_staging.stats.total_size = g_staging.stats.images_size + g_staging.stats.buffers_size;
return cmdbuf;
return current;
}

View File

@ -35,14 +35,14 @@ vk_staging_region_t R_VkStagingLockForImage(vk_staging_image_args_t args);
void R_VkStagingUnlock(staging_handle_t handle);
// Append copy commands to command buffer.
VkCommandBuffer R_VkStagingCommit(void);
struct vk_combuf_s* R_VkStagingCommit(void);
// Mark previous frame data as uploaded and safe to use.
void R_VkStagingFrameBegin(void);
// Uploads staging contents and returns the command buffer ready to be submitted.
// Can return NULL if there's nothing to upload.
VkCommandBuffer R_VkStagingFrameEnd(void);
struct vk_combuf_s *R_VkStagingFrameEnd(void);
// Gets the current command buffer.
// WARNING: Can be invalidated by any of the Lock calls

View File

@ -6,6 +6,7 @@
#include "vk_const.h"
#include "vk_descriptor.h"
#include "vk_mapents.h" // wadlist
#include "vk_combuf.h"
#include "xash3d_mathlib.h"
#include "crtlib.h"
@ -672,7 +673,10 @@ static qboolean uploadTexture(vk_texture_t *tex, rgbdata_t *const *const layers,
}
}
const VkCommandBuffer cmdbuf = R_VkStagingCommit();
// TODO Don't change layout here. Alternatively:
// I. Attach layout metadata to the image, and request its change next time it is used.
// II. Build-in layout transfer to staging commit and do it there on commit.
const VkCommandBuffer cmdbuf = R_VkStagingCommit()->cmdbuf;
// 5.2 image:layout:DST -> image:layout:SAMPLED
// 5.2.1 transitionToLayout(DST -> SHADER_READ_ONLY)
@ -689,7 +693,7 @@ static qboolean uploadTexture(vk_texture_t *tex, rgbdata_t *const *const layers,
};
vkCmdPipelineBarrier(cmdbuf,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // FIXME incorrect, we also use them in compute and potentially ray tracing shaders
0, 0, NULL, 0, NULL, 1, &image_barrier);
}