vk: display gpu time taken for a previous frame
its value will be incorrect for non-nv cards which have non-1 timestampPeriod
This commit is contained in:
parent
be95b65b22
commit
93a539df74
|
@ -108,10 +108,13 @@ static void drawProfilerScopes(const aprof_event_t *events, uint64_t frame_begin
|
|||
}
|
||||
|
||||
// FIXME move this to r_speeds or something like that
|
||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_index) {
|
||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_time_ns) {
|
||||
APROF_SCOPE_DECLARE_BEGIN(__FUNCTION__, __FUNCTION__);
|
||||
|
||||
int line = 4;
|
||||
{
|
||||
const int dirty = g_lights.stats.dirty_cells;
|
||||
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
|
||||
gEngine.Con_NPrintf(line++, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
|
||||
}
|
||||
|
||||
const uint32_t events = g_aprof.events_last_frame - prev_frame_index;
|
||||
|
@ -119,13 +122,14 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index) {
|
|||
const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - frame_begin_time;
|
||||
const float frame_time = delta_ns / 1e6;
|
||||
|
||||
gEngine.Con_NPrintf(5, "aprof events this frame: %u, wraps: %d, frame time: %.03fms\n", events, g_aprof.current_frame_wraparounds, frame_time);
|
||||
gEngine.Con_NPrintf(line++, "GPU frame time: %.03fms\n", gpu_time_ns * 1e-6);
|
||||
gEngine.Con_NPrintf(line++, "aprof events this frame: %u, wraps: %d, frame time: %.03fms\n", events, g_aprof.current_frame_wraparounds, frame_time);
|
||||
|
||||
g_slows.frame_times[g_slows.frame_num] = frame_time;
|
||||
g_slows.frame_num = (g_slows.frame_num + 1) % MAX_FRAMES_HISTORY;
|
||||
|
||||
const float width = (float)vk_frame.width / MAX_FRAMES_HISTORY;
|
||||
const int frame_bar_y = 100;
|
||||
const int frame_bar_y = 100; // TODO font_height * scale * (line + 1)
|
||||
const float frame_bar_y_scale = 2.f; // ms to pixels
|
||||
|
||||
// 60fps
|
||||
|
@ -173,16 +177,7 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index) {
|
|||
drawProfilerScopes(events, frame_begin_time, time_scale_ms, event_begin, event_end, y);
|
||||
}
|
||||
|
||||
/* gEngine.Con_NPrintf(5, "Perf scopes:"); */
|
||||
/* for (int i = 0; i < g_aprof.num_scopes; ++i) { */
|
||||
/* const aprof_scope_t *const scope = g_aprof.scopes + i; */
|
||||
/* gEngine.Con_NPrintf(6 + i, "%s: c%d t%.03f(%.03f)ms s%.03f(%.03f)ms", scope->name, */
|
||||
/* scope->frame.count, */
|
||||
/* scope->frame.duration / 1e6, */
|
||||
/* (scope->frame.duration / 1e6) / scope->frame.count, */
|
||||
/* (scope->frame.duration - scope->frame.duration_children) / 1e6, */
|
||||
/* (scope->frame.duration - scope->frame.duration_children) / 1e6 / scope->frame.count); */
|
||||
/* } */
|
||||
APROF_SCOPE_END(__FUNCTION__);
|
||||
}
|
||||
|
||||
static void togglePause( void ) {
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
|
||||
void R_SlowsInit( void );
|
||||
|
||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_event_index);
|
||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_event_index, uint64_t gpu_time_ns);
|
||||
|
|
|
@ -246,6 +246,11 @@ do { \
|
|||
X(vkGetImageSubresourceLayout) \
|
||||
X(vkCmdSetCheckpointNV) \
|
||||
X(vkGetQueueCheckpointDataNV) \
|
||||
X(vkCreateQueryPool) \
|
||||
X(vkDestroyQueryPool) \
|
||||
X(vkCmdResetQueryPool) \
|
||||
X(vkCmdWriteTimestamp) \
|
||||
X(vkGetQueryPoolResults) \
|
||||
|
||||
#define DEVICE_FUNCS_RTX(X) \
|
||||
X(vkGetAccelerationStructureBuildSizesKHR) \
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "vk_image.h"
|
||||
#include "vk_staging.h"
|
||||
#include "vk_commandpool.h"
|
||||
#include "vk_querypool.h"
|
||||
|
||||
#include "profiler.h"
|
||||
#include "r_slows.h"
|
||||
|
@ -22,6 +23,7 @@ extern ref_globals_t *gpGlobals;
|
|||
|
||||
vk_framectl_t vk_frame = {0};
|
||||
|
||||
// Phase tracking is needed for getting screenshots. Basically, getting a screenshot does the same things as R_EndFrame, and they need to be congruent.
|
||||
typedef enum {
|
||||
Phase_Idle,
|
||||
Phase_FrameBegan,
|
||||
|
@ -36,6 +38,9 @@ static struct {
|
|||
VkSemaphore sem_done2[MAX_CONCURRENT_FRAMES];
|
||||
VkFence fence_done[MAX_CONCURRENT_FRAMES];
|
||||
|
||||
// TODO these should be tightly coupled with commandbuffers
|
||||
vk_query_pool_t qpools[MAX_CONCURRENT_FRAMES];
|
||||
|
||||
qboolean rtx_enabled;
|
||||
|
||||
struct {
|
||||
|
@ -187,19 +192,33 @@ static void updateGamma( void ) {
|
|||
}
|
||||
|
||||
void R_BeginFrame( qboolean clearScene ) {
|
||||
APROF_SCOPE_DECLARE_BEGIN(begin_frame_tail, "R_BeginFrame_tail");
|
||||
ASSERT(g_frame.current.phase == Phase_Submitted || g_frame.current.phase == Phase_Idle);
|
||||
const int prev_frame_index = g_frame.current.index % MAX_CONCURRENT_FRAMES;
|
||||
g_frame.current.index = (g_frame.current.index + 1) % MAX_CONCURRENT_FRAMES;
|
||||
const VkCommandBuffer cmdbuf = vk_frame.cmdbuf = g_frame.command.buffers[g_frame.current.index];
|
||||
vk_query_pool_t *const qpool = g_frame.qpools + g_frame.current.index;
|
||||
|
||||
{
|
||||
const uint32_t prev_frame_event_index = aprof_scope_frame();
|
||||
R_ShowExtendedProfilingData(prev_frame_event_index);
|
||||
waitForFrameFence();
|
||||
// Current command buffer is done and available
|
||||
// Previous might still be in flight
|
||||
|
||||
R_VkQueryPoolGetFrameResults(g_frame.qpools + g_frame.current.index);
|
||||
}
|
||||
|
||||
APROF_SCOPE_END(begin_frame_tail);
|
||||
|
||||
const uint32_t prev_frame_event_index = aprof_scope_frame();
|
||||
|
||||
APROF_SCOPE_BEGIN(frame);
|
||||
APROF_SCOPE_BEGIN(begin_frame);
|
||||
|
||||
{
|
||||
// FIXME correct only for nvidia where timestamps are ns
|
||||
const uint64_t gpu_time_ns_fixme = (qpool->used) ? qpool->results[1] - qpool->results[0] : 0;
|
||||
R_ShowExtendedProfilingData(prev_frame_event_index, gpu_time_ns_fixme);
|
||||
}
|
||||
|
||||
if (vk_core.rtx && FBitSet( vk_rtx->flags, FCVAR_CHANGED )) {
|
||||
g_frame.rtx_enabled = CVAR_TO_BOOL( vk_rtx );
|
||||
}
|
||||
|
@ -209,7 +228,6 @@ void R_BeginFrame( qboolean clearScene ) {
|
|||
|
||||
ASSERT(!g_frame.current.framebuffer.framebuffer);
|
||||
|
||||
waitForFrameFence();
|
||||
R_VkStagingFrameBegin();
|
||||
|
||||
g_frame.current.framebuffer = R_VkSwapchainAcquire( g_frame.sem_framebuffer_ready[g_frame.current.index] );
|
||||
|
@ -226,6 +244,8 @@ void R_BeginFrame( qboolean clearScene ) {
|
|||
XVK_CHECK(vkBeginCommandBuffer(cmdbuf, &beginfo));
|
||||
}
|
||||
|
||||
R_VkQueryPoolBegin(g_frame.qpools + g_frame.current.index, cmdbuf);
|
||||
|
||||
g_frame.current.phase = Phase_FrameBegan;
|
||||
APROF_SCOPE_END(begin_frame);
|
||||
}
|
||||
|
@ -289,6 +309,7 @@ static void enqueueRendering( VkCommandBuffer cmdbuf ) {
|
|||
static void submit( VkCommandBuffer cmdbuf, qboolean wait ) {
|
||||
ASSERT(g_frame.current.phase == Phase_RenderingEnqueued);
|
||||
|
||||
R_VkQueryPoolEnd(g_frame.qpools + g_frame.current.index, cmdbuf);
|
||||
XVK_CHECK(vkEndCommandBuffer(cmdbuf));
|
||||
|
||||
const VkCommandBuffer cmdbufs[] = {
|
||||
|
@ -394,6 +415,8 @@ qboolean VK_FrameCtlInit( void )
|
|||
SET_DEBUG_NAMEF(g_frame.sem_done2[i], VK_OBJECT_TYPE_SEMAPHORE, "done2[%d]", i);
|
||||
g_frame.fence_done[i] = R_VkFenceCreate(true);
|
||||
SET_DEBUG_NAMEF(g_frame.fence_done[i], VK_OBJECT_TYPE_FENCE, "done[%d]", i);
|
||||
|
||||
R_VkQueryPoolInit(g_frame.qpools + i);
|
||||
}
|
||||
|
||||
// Signal first frame semaphore as done
|
||||
|
@ -429,6 +452,7 @@ void VK_FrameCtlShutdown( void ) {
|
|||
R_VkSemaphoreDestroy(g_frame.sem_done[i]);
|
||||
R_VkSemaphoreDestroy(g_frame.sem_done2[i]);
|
||||
R_VkFenceDestroy(g_frame.fence_done[i]);
|
||||
R_VkQueryPoolDestroy(g_frame.qpools + i);
|
||||
}
|
||||
|
||||
R_VkSwapchainShutdown();
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
#include "vk_querypool.h"
|
||||
|
||||
qboolean R_VkQueryPoolInit( vk_query_pool_t* pool ) {
|
||||
const VkQueryPoolCreateInfo qpci = {
|
||||
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||
.pNext = NULL,
|
||||
.queryType = VK_QUERY_TYPE_TIMESTAMP,
|
||||
.queryCount = MAX_QUERY_COUNT,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
XVK_CHECK(vkCreateQueryPool(vk_core.device, &qpci, NULL, &pool->pool));
|
||||
return true;
|
||||
}
|
||||
|
||||
void R_VkQueryPoolDestroy( vk_query_pool_t *pool ) {
|
||||
vkDestroyQueryPool(vk_core.device, pool->pool, NULL);
|
||||
}
|
||||
|
||||
int R_VkQueryPoolTimestamp( vk_query_pool_t *pool, VkCommandBuffer cmdbuf, VkPipelineStageFlagBits stage) {
|
||||
if (pool->used >= MAX_QUERY_COUNT)
|
||||
return -1;
|
||||
|
||||
vkCmdWriteTimestamp(cmdbuf, stage, pool->pool, pool->used);
|
||||
return pool->used++;
|
||||
}
|
||||
|
||||
void R_VkQueryPoolBegin( vk_query_pool_t *pool, VkCommandBuffer cmdbuf ) {
|
||||
pool->used = 0;
|
||||
vkCmdResetQueryPool(cmdbuf, pool->pool, 0, MAX_QUERY_COUNT);
|
||||
R_VkQueryPoolTimestamp(pool, cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||
}
|
||||
|
||||
void R_VkQueryPoolEnd( vk_query_pool_t *pool, VkCommandBuffer cmdbuf ) {
|
||||
R_VkQueryPoolTimestamp(pool, cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||
}
|
||||
|
||||
void R_VkQueryPoolGetFrameResults( vk_query_pool_t *pool ) {
|
||||
if (!pool->used)
|
||||
return;
|
||||
|
||||
vkGetQueryPoolResults(vk_core.device, pool->pool, 0, pool->used, pool->used * sizeof(uint64_t), pool->results, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
#pragma once
|
||||
|
||||
#include "vk_core.h"
|
||||
|
||||
#define MAX_QUERY_COUNT 128
|
||||
|
||||
typedef struct {
|
||||
VkQueryPool pool;
|
||||
int used;
|
||||
uint64_t results[MAX_QUERY_COUNT];
|
||||
} vk_query_pool_t;
|
||||
|
||||
qboolean R_VkQueryPoolInit( vk_query_pool_t *pool );
|
||||
void R_VkQueryPoolDestroy( vk_query_pool_t *pool );
|
||||
int R_VkQueryPoolTimestamp( vk_query_pool_t *pool, VkCommandBuffer cmdbuf, VkPipelineStageFlagBits stage);
|
||||
void R_VkQueryPoolBegin( vk_query_pool_t *pool, VkCommandBuffer cmdbuf );
|
||||
void R_VkQueryPoolEnd( vk_query_pool_t *pool, VkCommandBuffer cmdbuf );
|
||||
|
||||
void R_VkQueryPoolGetFrameResults( vk_query_pool_t *pool );
|
Loading…
Reference in New Issue