mirror of
https://github.com/w23/xash3d-fwgs
synced 2024-12-14 21:20:26 +01:00
vk: profiler: extract gpu timestamps in a generic manner
This commit is contained in:
parent
73a6cf596a
commit
1bf6f6ee74
@ -2,6 +2,7 @@
|
|||||||
#include "vk_overlay.h"
|
#include "vk_overlay.h"
|
||||||
#include "vk_framectl.h"
|
#include "vk_framectl.h"
|
||||||
#include "vk_cvar.h"
|
#include "vk_cvar.h"
|
||||||
|
#include "vk_combuf.h"
|
||||||
|
|
||||||
#include "profiler.h"
|
#include "profiler.h"
|
||||||
|
|
||||||
@ -565,9 +566,12 @@ void R_SpeedsRegisterMetric(int* p_value, const char *name, r_speeds_metric_type
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_begin_ns, uint64_t gpu_frame_end_ns) {
|
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl) {
|
||||||
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
|
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
|
||||||
|
|
||||||
|
const uint64_t gpu_frame_begin_ns = gpurofl->timestamps[0];
|
||||||
|
const uint64_t gpu_frame_end_ns = gpurofl->timestamps[1];
|
||||||
|
|
||||||
// Reads current font/DPI scale, many functions below use it
|
// Reads current font/DPI scale, many functions below use it
|
||||||
getCurrentFontMetrics();
|
getCurrentFontMetrics();
|
||||||
|
|
||||||
|
@ -4,7 +4,8 @@
|
|||||||
|
|
||||||
void R_SpeedsInit( void );
|
void R_SpeedsInit( void );
|
||||||
|
|
||||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_begin_ns, uint64_t gpu_frame_end_ns);
|
struct vk_combuf_scopes_s;
|
||||||
|
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl);
|
||||||
|
|
||||||
// Called from the engine into ref_api to get the latest speeds info
|
// Called from the engine into ref_api to get the latest speeds info
|
||||||
qboolean R_SpeedsMessage( char *out, size_t size );
|
qboolean R_SpeedsMessage( char *out, size_t size );
|
||||||
|
@ -1,15 +1,21 @@
|
|||||||
#include "vk_combuf.h"
|
#include "vk_combuf.h"
|
||||||
#include "vk_commandpool.h"
|
#include "vk_commandpool.h"
|
||||||
|
|
||||||
|
#include "profiler.h"
|
||||||
|
|
||||||
#define MAX_COMMANDBUFFERS 4
|
#define MAX_COMMANDBUFFERS 4
|
||||||
#define MAX_QUERY_COUNT 128
|
#define MAX_QUERY_COUNT 128
|
||||||
|
#define MAX_SCOPES 64
|
||||||
|
|
||||||
|
#define BEGIN_INDEX_TAG 0x10000000
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
vk_combuf_t public;
|
vk_combuf_t public;
|
||||||
int used;
|
int used;
|
||||||
struct {
|
struct {
|
||||||
// First two is entire command buffer time [begin, end]
|
int timestamps_offset;
|
||||||
uint32_t timestamps_offset;
|
int scopes[MAX_SCOPES];
|
||||||
|
int scopes_count;
|
||||||
} profiler;
|
} profiler;
|
||||||
} vk_combuf_impl_t;
|
} vk_combuf_impl_t;
|
||||||
|
|
||||||
@ -17,11 +23,15 @@ static struct {
|
|||||||
vk_command_pool_t pool;
|
vk_command_pool_t pool;
|
||||||
|
|
||||||
vk_combuf_impl_t combufs[MAX_COMMANDBUFFERS];
|
vk_combuf_impl_t combufs[MAX_COMMANDBUFFERS];
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
VkQueryPool pool;
|
VkQueryPool pool;
|
||||||
uint64_t values[MAX_QUERY_COUNT * MAX_COMMANDBUFFERS];
|
uint64_t values[MAX_QUERY_COUNT * MAX_COMMANDBUFFERS];
|
||||||
} timestamp;
|
} timestamp;
|
||||||
|
|
||||||
|
vk_combuf_scope_t scopes[MAX_SCOPES];
|
||||||
|
int scopes_count;
|
||||||
|
|
||||||
|
int entire_combuf_scope_id;
|
||||||
} g_combuf;
|
} g_combuf;
|
||||||
|
|
||||||
qboolean R_VkCombuf_Init( void ) {
|
qboolean R_VkCombuf_Init( void ) {
|
||||||
@ -46,14 +56,10 @@ qboolean R_VkCombuf_Init( void ) {
|
|||||||
SET_DEBUG_NAMEF(cb->public.cmdbuf, VK_OBJECT_TYPE_COMMAND_BUFFER, "cmdbuf[%d]", i);
|
SET_DEBUG_NAMEF(cb->public.cmdbuf, VK_OBJECT_TYPE_COMMAND_BUFFER, "cmdbuf[%d]", i);
|
||||||
|
|
||||||
cb->profiler.timestamps_offset = i * MAX_QUERY_COUNT;
|
cb->profiler.timestamps_offset = i * MAX_QUERY_COUNT;
|
||||||
|
|
||||||
/* for (int j = 0; j < COUNTOF(cb->public.sema_done); ++j) { */
|
|
||||||
/* cb->public.sema_done[j] = R_VkSemaphoreCreate(); */
|
|
||||||
/* ASSERT(cb->public.sema_done[j]); */
|
|
||||||
/* SET_DEBUG_NAMEF(cb->public.sema_done[j], VK_OBJECT_TYPE_SEMAPHORE, "done[%d][%d]", i, j); */
|
|
||||||
/* } */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
g_combuf.entire_combuf_scope_id = R_VkGpuScope_Register("EVERYTHING");
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -79,11 +85,14 @@ void R_VkCombufClose( vk_combuf_t* pub ) {
|
|||||||
cb->used = 0;
|
cb->used = 0;
|
||||||
|
|
||||||
// TODO synchronize?
|
// TODO synchronize?
|
||||||
|
// For now, external synchronization expected
|
||||||
}
|
}
|
||||||
|
|
||||||
void R_VkCombufBegin( vk_combuf_t* pub ) {
|
void R_VkCombufBegin( vk_combuf_t* pub ) {
|
||||||
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
|
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
|
||||||
|
|
||||||
|
cb->profiler.scopes_count = 0;
|
||||||
|
|
||||||
const VkCommandBufferBeginInfo beginfo = {
|
const VkCommandBufferBeginInfo beginfo = {
|
||||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||||
@ -91,25 +100,114 @@ void R_VkCombufBegin( vk_combuf_t* pub ) {
|
|||||||
XVK_CHECK(vkBeginCommandBuffer(cb->public.cmdbuf, &beginfo));
|
XVK_CHECK(vkBeginCommandBuffer(cb->public.cmdbuf, &beginfo));
|
||||||
|
|
||||||
vkCmdResetQueryPool(cb->public.cmdbuf, g_combuf.timestamp.pool, cb->profiler.timestamps_offset, MAX_QUERY_COUNT);
|
vkCmdResetQueryPool(cb->public.cmdbuf, g_combuf.timestamp.pool, cb->profiler.timestamps_offset, MAX_QUERY_COUNT);
|
||||||
vkCmdWriteTimestamp(cb->public.cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + 0);
|
R_VkCombufScopeBegin(pub, g_combuf.entire_combuf_scope_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void R_VkCombufEnd( vk_combuf_t* pub ) {
|
void R_VkCombufEnd( vk_combuf_t* pub ) {
|
||||||
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
|
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
|
||||||
vkCmdWriteTimestamp(cb->public.cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + 1);
|
R_VkCombufScopeEnd(pub, 0 | BEGIN_INDEX_TAG, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||||
XVK_CHECK(vkEndCommandBuffer(cb->public.cmdbuf));
|
XVK_CHECK(vkEndCommandBuffer(cb->public.cmdbuf));
|
||||||
}
|
}
|
||||||
|
|
||||||
int R_VkGpuScope_Register(const char *name) {
|
int R_VkGpuScope_Register(const char *name) {
|
||||||
// FIXME
|
if (g_combuf.scopes_count == MAX_SCOPES) {
|
||||||
|
gEngine.Con_Printf(S_ERROR "Cannot register GPU profiler scope \"%s\": max number of scope %d reached\n", name, MAX_SCOPES);
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_combuf.scopes[g_combuf.scopes_count].name = name;
|
||||||
|
|
||||||
|
return g_combuf.scopes_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
int R_VkCombufScopeBegin(vk_combuf_t* combuf, int scope_id) {
|
int R_VkCombufScopeBegin(vk_combuf_t* cumbuf, int scope_id) {
|
||||||
// FIXME
|
if (scope_id < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
ASSERT(scope_id < g_combuf.scopes_count);
|
||||||
|
|
||||||
|
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)cumbuf;
|
||||||
|
if (cb->profiler.scopes_count == MAX_SCOPES)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
cb->profiler.scopes[cb->profiler.scopes_count] = scope_id;
|
||||||
|
|
||||||
|
vkCmdWriteTimestamp(cb->public.cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + cb->profiler.scopes_count * 2);
|
||||||
|
|
||||||
|
return (cb->profiler.scopes_count++) | BEGIN_INDEX_TAG;
|
||||||
}
|
}
|
||||||
|
|
||||||
void R_VkCombufScopeEnd(vk_combuf_t* combuf, int begin_index, VkPipelineStageFlagBits pipeline_stage) {
|
void R_VkCombufScopeEnd(vk_combuf_t* combuf, int begin_index, VkPipelineStageFlagBits pipeline_stage) {
|
||||||
// FIXME
|
if (begin_index < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ASSERT(begin_index & BEGIN_INDEX_TAG);
|
||||||
|
begin_index ^= BEGIN_INDEX_TAG;
|
||||||
|
|
||||||
|
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)combuf;
|
||||||
|
|
||||||
|
vkCmdWriteTimestamp(cb->public.cmdbuf, pipeline_stage, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + begin_index * 2 + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t getGpuTimestampOffsetNs( uint64_t latest_gpu_timestamp, uint64_t latest_cpu_timestamp_ns ) {
|
||||||
|
// FIXME this is an incorrect check, we need to carry per-device extensions availability somehow. vk_core-vs-device refactoring pending
|
||||||
|
if (!vkGetCalibratedTimestampsEXT) {
|
||||||
|
// Estimate based on supposed submission time, assuming that we submit, and it starts computing right after cmdbuffer closure
|
||||||
|
// which may not be true. But it's all we got
|
||||||
|
// TODO alternative approach: estimate based on end timestamp
|
||||||
|
const uint64_t gpu_begin_ns = (double) latest_gpu_timestamp * vk_core.physical_device.properties.limits.timestampPeriod;
|
||||||
|
return latest_cpu_timestamp_ns - gpu_begin_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
const VkCalibratedTimestampInfoEXT cti[2] = {
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
|
||||||
|
.pNext = NULL,
|
||||||
|
.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
|
||||||
|
.pNext = NULL,
|
||||||
|
#if defined(_WIN32)
|
||||||
|
.timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT,
|
||||||
|
#else
|
||||||
|
.timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
|
||||||
|
#endif
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
uint64_t timestamps[2] = {0};
|
||||||
|
uint64_t max_deviation[2] = {0};
|
||||||
|
vkGetCalibratedTimestampsEXT(vk_core.device, 2, cti, timestamps, max_deviation);
|
||||||
|
|
||||||
|
const uint64_t cpu = aprof_time_platform_to_ns(timestamps[1]);
|
||||||
|
const uint64_t gpu = (double)timestamps[0] * vk_core.physical_device.properties.limits.timestampPeriod;
|
||||||
|
return cpu - gpu;
|
||||||
|
}
|
||||||
|
|
||||||
|
vk_combuf_scopes_t R_VkCombufScopesGet( vk_combuf_t *pub ) {
|
||||||
|
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
|
||||||
|
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
|
||||||
|
|
||||||
|
uint64_t *const timestamps = g_combuf.timestamp.values + cb->profiler.timestamps_offset;
|
||||||
|
const int timestamps_count = cb->profiler.scopes_count * 2;
|
||||||
|
|
||||||
|
vkGetQueryPoolResults(vk_core.device, g_combuf.timestamp.pool, cb->profiler.timestamps_offset, timestamps_count, timestamps_count * sizeof(uint64_t), timestamps, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||||
|
|
||||||
|
const uint64_t timestamp_offset_ns = getGpuTimestampOffsetNs( timestamps[1], aprof_time_now_ns() );
|
||||||
|
const double timestamp_period = vk_core.physical_device.properties.limits.timestampPeriod;
|
||||||
|
|
||||||
|
for (int i = 0; i < timestamps_count; ++i) {
|
||||||
|
const uint64_t gpu_ns = timestamps[i] * timestamp_period;
|
||||||
|
timestamps[i] = timestamp_offset_ns + gpu_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
APROF_SCOPE_END(function);
|
||||||
|
|
||||||
|
return (vk_combuf_scopes_t){
|
||||||
|
.timestamps = g_combuf.timestamp.values + cb->profiler.timestamps_offset,
|
||||||
|
.scopes = g_combuf.scopes,
|
||||||
|
.entries = cb->profiler.scopes,
|
||||||
|
.entries_count = cb->profiler.scopes_count,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,6 @@
|
|||||||
|
|
||||||
typedef struct vk_combuf_s {
|
typedef struct vk_combuf_s {
|
||||||
VkCommandBuffer cmdbuf;
|
VkCommandBuffer cmdbuf;
|
||||||
// VkSemaphore sema_done[2];
|
|
||||||
// VkFence fence_done;
|
|
||||||
} vk_combuf_t;
|
} vk_combuf_t;
|
||||||
|
|
||||||
qboolean R_VkCombuf_Init( void );
|
qboolean R_VkCombuf_Init( void );
|
||||||
@ -23,4 +21,18 @@ int R_VkGpuScope_Register(const char *name);
|
|||||||
int R_VkCombufScopeBegin(vk_combuf_t*, int scope_id);
|
int R_VkCombufScopeBegin(vk_combuf_t*, int scope_id);
|
||||||
void R_VkCombufScopeEnd(vk_combuf_t*, int begin_index, VkPipelineStageFlagBits pipeline_stage);
|
void R_VkCombufScopeEnd(vk_combuf_t*, int begin_index, VkPipelineStageFlagBits pipeline_stage);
|
||||||
|
|
||||||
// TODO r_vkgpu_scopes_t *R_VkGpuScopesGet( VkCommandBuffer cmdbuf );
|
typedef struct {
|
||||||
|
const char *name;
|
||||||
|
} vk_combuf_scope_t;
|
||||||
|
|
||||||
|
typedef struct vk_combuf_scopes_s {
|
||||||
|
const uint64_t *timestamps;
|
||||||
|
const vk_combuf_scope_t *scopes;
|
||||||
|
const int *entries; // index into scopes; each entry consumes 2 values from timestamps array sequentially
|
||||||
|
int entries_count;
|
||||||
|
} vk_combuf_scopes_t;
|
||||||
|
|
||||||
|
// Reads all the scope timing data (timestamp queries) and returns a list of things happened this frame.
|
||||||
|
// Prerequisite: all relevant recorded command buffers should've been completed and waited on already.
|
||||||
|
// The returned pointer remains valid until any next R_VkGpu*() call.
|
||||||
|
vk_combuf_scopes_t R_VkCombufScopesGet( vk_combuf_t * );
|
||||||
|
@ -220,10 +220,8 @@ void R_BeginFrame( qboolean clearScene ) {
|
|||||||
APROF_SCOPE_BEGIN(begin_frame);
|
APROF_SCOPE_BEGIN(begin_frame);
|
||||||
|
|
||||||
{
|
{
|
||||||
// FIXME collect and show more gpu profiling data
|
const vk_combuf_scopes_t gpurofl = R_VkCombufScopesGet(frame->combuf);
|
||||||
const uint64_t gpu_time_begin_ns = 0;// FIXME (qpool->used) ? qpool->results[0] : 0;
|
R_SpeedsDisplayMore(prev_frame_event_index, &gpurofl);
|
||||||
const uint64_t gpu_time_end_ns = 0;// FIXME (qpool->used) ? qpool->results[1] : 0;
|
|
||||||
R_ShowExtendedProfilingData(prev_frame_event_index, gpu_time_begin_ns, gpu_time_end_ns);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vk_core.rtx && FBitSet( vk_rtx->flags, FCVAR_CHANGED )) {
|
if (vk_core.rtx && FBitSet( vk_rtx->flags, FCVAR_CHANGED )) {
|
||||||
|
@ -1,99 +0,0 @@
|
|||||||
#include "vk_gpurofl.h"
|
|
||||||
#include "vk_querypool.h"
|
|
||||||
|
|
||||||
#define MAX_SCOPES 64
|
|
||||||
#define MAX_COMMANDBUFFERS 8
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char *name;
|
|
||||||
} r_vkgpu_scope_t;
|
|
||||||
|
|
||||||
#define EVENT_BEGIN 0x100
|
|
||||||
|
|
||||||
// B....E
|
|
||||||
// B....E
|
|
||||||
// -> B..B.E..E
|
|
||||||
// -> B.......E
|
|
||||||
// -> B.E
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VkCommandBuffer cmdbuf;
|
|
||||||
vk_query_pool_t *qpool;
|
|
||||||
|
|
||||||
uint32_t events[MAX_QUERY_COUNT];
|
|
||||||
} r_vkgpu_cmdbuf_assoc_t;
|
|
||||||
|
|
||||||
static struct {
|
|
||||||
r_vkgpu_scope_t scopes[MAX_SCOPES];
|
|
||||||
int scopes_count;
|
|
||||||
|
|
||||||
// FIXME couple these more tightly
|
|
||||||
r_vkgpu_cmdbuf_assoc_t assocs[MAX_COMMANDBUFFERS];
|
|
||||||
|
|
||||||
r_vkgpu_scopes_t last_frame;
|
|
||||||
} g_purofl;
|
|
||||||
|
|
||||||
int R_VkGpuScopeRegister(const char *name) {
|
|
||||||
if (g_purofl.scopes_count == MAX_SCOPES) {
|
|
||||||
gEngine.Con_Printf(S_ERROR "Cannot register GPU profiler scope \"%s\": max number of scope %d reached\n", name, MAX_SCOPES);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
g_purofl.scopes[g_purofl.scopes_count].name = name;
|
|
||||||
|
|
||||||
return g_purofl.scopes_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
void R_VkGpuBegin(VkCommandBuffer cmdbuf, vk_query_pool_t *qpool) {
|
|
||||||
for (int i = 0; i < MAX_COMMANDBUFFERS; ++i) {
|
|
||||||
r_vkgpu_cmdbuf_assoc_t *const assoc = g_purofl.assocs + i;
|
|
||||||
if (!assoc->cmdbuf) {
|
|
||||||
assoc->cmdbuf = cmdbuf;
|
|
||||||
assoc->qpool = qpool;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (assoc->cmdbuf == cmdbuf) {
|
|
||||||
assoc->qpool = qpool;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(!"FIXME Cannot associate cmdbuf with query pool, slots exceeded");
|
|
||||||
}
|
|
||||||
|
|
||||||
static vk_query_pool_t *getQueryPool(VkCommandBuffer cmdbuf) {
|
|
||||||
for (int i = 0; i < MAX_COMMANDBUFFERS; ++i) {
|
|
||||||
r_vkgpu_cmdbuf_assoc_t *const assoc = g_purofl.assocs + i;
|
|
||||||
if (!assoc->cmdbuf)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (assoc->cmdbuf == cmdbuf)
|
|
||||||
return assoc->qpool;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void writeTimestamp(VkCommandBuffer cmdbuf, int scope_id, VkPipelineStageFlagBits stage, int begin) {
|
|
||||||
if (scope_id < 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// 1. Find query pool for the cmdbuf
|
|
||||||
vk_query_pool_t *const qpool = getQueryPool(cmdbuf);
|
|
||||||
if (!qpool) // TODO complain?
|
|
||||||
return;
|
|
||||||
|
|
||||||
// 2. Write timestamp
|
|
||||||
const int timestamp_id = R_VkQueryPoolTimestamp(qpool, cmdbuf, stage);
|
|
||||||
|
|
||||||
// 3. Associate timestamp index with scope_begin
|
|
||||||
}
|
|
||||||
|
|
||||||
/* int R_VkGpuScopeBegin(VkCommandBuffer cmdbuf, int scope_id) { */
|
|
||||||
/* writeTimestamp(cmdbuf, scope_id, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 1); */
|
|
||||||
/* } */
|
|
||||||
/* */
|
|
||||||
/* void R_VkGpuScopeEnd(VkCommandBuffer cmdbuf, int begin_index, VkPipelineStageFlagBits pipeline_stage) { */
|
|
||||||
/* writeTimestamp(cmdbuf, scope_id, pipeline_stage, 0); */
|
|
||||||
/* } */
|
|
@ -1,30 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "vk_core.h"
|
|
||||||
|
|
||||||
// Return scope_id for the new scope. -1 if failed
|
|
||||||
// name is expected to be statically allocated
|
|
||||||
int R_VkGpuScopeRegister(const char *name);
|
|
||||||
|
|
||||||
typedef struct vk_query_pool_s vk_query_pool_t;
|
|
||||||
void R_VkGpuBegin(VkCommandBuffer cmdbuf, vk_query_pool_t *qpool);
|
|
||||||
|
|
||||||
// Returns begin_index to use in R_VkGpuScopeEnd
|
|
||||||
int R_VkGpuScopeBegin(VkCommandBuffer cmdbuf, int scope_id);
|
|
||||||
|
|
||||||
void R_VkGpuScopeEnd(VkCommandBuffer cmdbuf, int begin_index, VkPipelineStageFlagBits pipeline_stage);
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char *name;
|
|
||||||
uint64_t begin_ns, end_ns;
|
|
||||||
} r_vkgpu_scope_entry_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
r_vkgpu_scope_entry_t *scopes;
|
|
||||||
int scopes_count;
|
|
||||||
} r_vkgpu_scopes_t;
|
|
||||||
|
|
||||||
// Reads all the scope timing data (timestamp queries) and returns a list of things happened this frame.
|
|
||||||
// Prerequisite: all relevant recorded command buffers should've been completed and waited on already.
|
|
||||||
// The returned pointer remains valid until any next R_VkGpu*() call.
|
|
||||||
r_vkgpu_scopes_t *R_VkGpuScopesGet( VkCommandBuffer cmdbuf );
|
|
@ -1,89 +0,0 @@
|
|||||||
#include "vk_querypool.h"
|
|
||||||
#include "profiler.h" // for aprof_time_now_ns()
|
|
||||||
|
|
||||||
qboolean R_VkQueryPoolInit( vk_query_pool_t* pool ) {
|
|
||||||
const VkQueryPoolCreateInfo qpci = {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
|
||||||
.pNext = NULL,
|
|
||||||
.queryType = VK_QUERY_TYPE_TIMESTAMP,
|
|
||||||
.queryCount = MAX_QUERY_COUNT,
|
|
||||||
.flags = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
XVK_CHECK(vkCreateQueryPool(vk_core.device, &qpci, NULL, &pool->pool));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void R_VkQueryPoolDestroy( vk_query_pool_t *pool ) {
|
|
||||||
vkDestroyQueryPool(vk_core.device, pool->pool, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
int R_VkQueryPoolTimestamp( vk_query_pool_t *pool, VkCommandBuffer cmdbuf, VkPipelineStageFlagBits stage) {
|
|
||||||
if (pool->used >= MAX_QUERY_COUNT)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
vkCmdWriteTimestamp(cmdbuf, stage, pool->pool, pool->used);
|
|
||||||
return pool->used++;
|
|
||||||
}
|
|
||||||
|
|
||||||
void R_VkQueryPoolBegin( vk_query_pool_t *pool, VkCommandBuffer cmdbuf ) {
|
|
||||||
pool->used = 0;
|
|
||||||
vkCmdResetQueryPool(cmdbuf, pool->pool, 0, MAX_QUERY_COUNT);
|
|
||||||
R_VkQueryPoolTimestamp(pool, cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
|
||||||
}
|
|
||||||
|
|
||||||
void R_VkQueryPoolEnd( vk_query_pool_t *pool, VkCommandBuffer cmdbuf ) {
|
|
||||||
R_VkQueryPoolTimestamp(pool, cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
|
||||||
pool->end_timestamp_ns = aprof_time_now_ns();
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint64_t getGpuTimestampOffsetNs( const vk_query_pool_t *pool ) {
|
|
||||||
// FIXME this is an incorrect check, we need to carry per-device extensions availability somehow. vk_core-vs-device refactoring pending
|
|
||||||
if (!vkGetCalibratedTimestampsEXT) {
|
|
||||||
// Estimate based on supposed submission time, assuming that we submit, and it starts computing right after cmdbuffer closure
|
|
||||||
// which may not be true. But it's all we got
|
|
||||||
// TODO alternative approach: estimate based on end timestamp
|
|
||||||
const uint64_t gpu_begin_ns = (double)pool->results[0] * vk_core.physical_device.properties.limits.timestampPeriod;
|
|
||||||
return pool->end_timestamp_ns - gpu_begin_ns;
|
|
||||||
}
|
|
||||||
|
|
||||||
const VkCalibratedTimestampInfoEXT cti[2] = {
|
|
||||||
{
|
|
||||||
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
|
|
||||||
.pNext = NULL,
|
|
||||||
.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
|
|
||||||
.pNext = NULL,
|
|
||||||
#if defined(_WIN32)
|
|
||||||
.timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT,
|
|
||||||
#else
|
|
||||||
.timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
|
|
||||||
#endif
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
uint64_t timestamps[2] = {0};
|
|
||||||
uint64_t max_deviation[2] = {0};
|
|
||||||
vkGetCalibratedTimestampsEXT(vk_core.device, 2, cti, timestamps, max_deviation);
|
|
||||||
|
|
||||||
const uint64_t cpu = aprof_time_platform_to_ns(timestamps[1]);
|
|
||||||
const uint64_t gpu = (double)timestamps[0] * vk_core.physical_device.properties.limits.timestampPeriod;
|
|
||||||
return cpu - gpu;
|
|
||||||
}
|
|
||||||
|
|
||||||
void R_VkQueryPoolGetFrameResults( vk_query_pool_t *pool ) {
|
|
||||||
if (!pool->used)
|
|
||||||
return;
|
|
||||||
|
|
||||||
vkGetQueryPoolResults(vk_core.device, pool->pool, 0, pool->used, pool->used * sizeof(uint64_t), pool->results, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
|
||||||
|
|
||||||
const uint64_t timestamp_offset_ns = getGpuTimestampOffsetNs( pool );
|
|
||||||
const double timestamp_period = vk_core.physical_device.properties.limits.timestampPeriod;
|
|
||||||
|
|
||||||
for (int i = 0; i < pool->used; ++i) {
|
|
||||||
const uint64_t gpu_ns = pool->results[i] * timestamp_period;
|
|
||||||
pool->results[i] = timestamp_offset_ns + gpu_ns;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "vk_core.h"
|
|
||||||
|
|
||||||
#define MAX_QUERY_COUNT 128
|
|
||||||
|
|
||||||
typedef struct vk_query_pool_s {
|
|
||||||
VkQueryPool pool;
|
|
||||||
int used;
|
|
||||||
uint64_t results[MAX_QUERY_COUNT];
|
|
||||||
uint64_t end_timestamp_ns;
|
|
||||||
} vk_query_pool_t;
|
|
||||||
|
|
||||||
qboolean R_VkQueryPoolInit( vk_query_pool_t *pool );
|
|
||||||
void R_VkQueryPoolDestroy( vk_query_pool_t *pool );
|
|
||||||
int R_VkQueryPoolTimestamp( vk_query_pool_t *pool, VkCommandBuffer cmdbuf, VkPipelineStageFlagBits stage);
|
|
||||||
void R_VkQueryPoolBegin( vk_query_pool_t *pool, VkCommandBuffer cmdbuf );
|
|
||||||
void R_VkQueryPoolEnd( vk_query_pool_t *pool, VkCommandBuffer cmdbuf );
|
|
||||||
|
|
||||||
void R_VkQueryPoolGetFrameResults( vk_query_pool_t *pool );
|
|
Loading…
Reference in New Issue
Block a user