vk: profiler: extract gpu timestamps in a generic manner

This commit is contained in:
Ivan Avdeev 2023-04-03 10:45:16 -07:00 committed by Ivan Avdeev
parent 73a6cf596a
commit 1bf6f6ee74
9 changed files with 139 additions and 264 deletions

View File

@ -2,6 +2,7 @@
#include "vk_overlay.h" #include "vk_overlay.h"
#include "vk_framectl.h" #include "vk_framectl.h"
#include "vk_cvar.h" #include "vk_cvar.h"
#include "vk_combuf.h"
#include "profiler.h" #include "profiler.h"
@ -565,9 +566,12 @@ void R_SpeedsRegisterMetric(int* p_value, const char *name, r_speeds_metric_type
} }
} }
void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_begin_ns, uint64_t gpu_frame_end_ns) { void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl) {
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__); APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
const uint64_t gpu_frame_begin_ns = gpurofl->timestamps[0];
const uint64_t gpu_frame_end_ns = gpurofl->timestamps[1];
// Reads current font/DPI scale, many functions below use it // Reads current font/DPI scale, many functions below use it
getCurrentFontMetrics(); getCurrentFontMetrics();

View File

@ -4,7 +4,8 @@
void R_SpeedsInit( void ); void R_SpeedsInit( void );
void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_begin_ns, uint64_t gpu_frame_end_ns); struct vk_combuf_scopes_s;
void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scopes_s *gpurofl);
// Called from the engine into ref_api to get the latest speeds info // Called from the engine into ref_api to get the latest speeds info
qboolean R_SpeedsMessage( char *out, size_t size ); qboolean R_SpeedsMessage( char *out, size_t size );

View File

@ -1,15 +1,21 @@
#include "vk_combuf.h" #include "vk_combuf.h"
#include "vk_commandpool.h" #include "vk_commandpool.h"
#include "profiler.h"
#define MAX_COMMANDBUFFERS 4 #define MAX_COMMANDBUFFERS 4
#define MAX_QUERY_COUNT 128 #define MAX_QUERY_COUNT 128
#define MAX_SCOPES 64
#define BEGIN_INDEX_TAG 0x10000000
typedef struct { typedef struct {
vk_combuf_t public; vk_combuf_t public;
int used; int used;
struct { struct {
// First two is entire command buffer time [begin, end] int timestamps_offset;
uint32_t timestamps_offset; int scopes[MAX_SCOPES];
int scopes_count;
} profiler; } profiler;
} vk_combuf_impl_t; } vk_combuf_impl_t;
@ -17,11 +23,15 @@ static struct {
vk_command_pool_t pool; vk_command_pool_t pool;
vk_combuf_impl_t combufs[MAX_COMMANDBUFFERS]; vk_combuf_impl_t combufs[MAX_COMMANDBUFFERS];
struct { struct {
VkQueryPool pool; VkQueryPool pool;
uint64_t values[MAX_QUERY_COUNT * MAX_COMMANDBUFFERS]; uint64_t values[MAX_QUERY_COUNT * MAX_COMMANDBUFFERS];
} timestamp; } timestamp;
vk_combuf_scope_t scopes[MAX_SCOPES];
int scopes_count;
int entire_combuf_scope_id;
} g_combuf; } g_combuf;
qboolean R_VkCombuf_Init( void ) { qboolean R_VkCombuf_Init( void ) {
@ -46,14 +56,10 @@ qboolean R_VkCombuf_Init( void ) {
SET_DEBUG_NAMEF(cb->public.cmdbuf, VK_OBJECT_TYPE_COMMAND_BUFFER, "cmdbuf[%d]", i); SET_DEBUG_NAMEF(cb->public.cmdbuf, VK_OBJECT_TYPE_COMMAND_BUFFER, "cmdbuf[%d]", i);
cb->profiler.timestamps_offset = i * MAX_QUERY_COUNT; cb->profiler.timestamps_offset = i * MAX_QUERY_COUNT;
/* for (int j = 0; j < COUNTOF(cb->public.sema_done); ++j) { */
/* cb->public.sema_done[j] = R_VkSemaphoreCreate(); */
/* ASSERT(cb->public.sema_done[j]); */
/* SET_DEBUG_NAMEF(cb->public.sema_done[j], VK_OBJECT_TYPE_SEMAPHORE, "done[%d][%d]", i, j); */
/* } */
} }
g_combuf.entire_combuf_scope_id = R_VkGpuScope_Register("EVERYTHING");
return true; return true;
} }
@ -79,11 +85,14 @@ void R_VkCombufClose( vk_combuf_t* pub ) {
cb->used = 0; cb->used = 0;
// TODO synchronize? // TODO synchronize?
// For now, external synchronization expected
} }
void R_VkCombufBegin( vk_combuf_t* pub ) { void R_VkCombufBegin( vk_combuf_t* pub ) {
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub; vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
cb->profiler.scopes_count = 0;
const VkCommandBufferBeginInfo beginfo = { const VkCommandBufferBeginInfo beginfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
@ -91,25 +100,114 @@ void R_VkCombufBegin( vk_combuf_t* pub ) {
XVK_CHECK(vkBeginCommandBuffer(cb->public.cmdbuf, &beginfo)); XVK_CHECK(vkBeginCommandBuffer(cb->public.cmdbuf, &beginfo));
vkCmdResetQueryPool(cb->public.cmdbuf, g_combuf.timestamp.pool, cb->profiler.timestamps_offset, MAX_QUERY_COUNT); vkCmdResetQueryPool(cb->public.cmdbuf, g_combuf.timestamp.pool, cb->profiler.timestamps_offset, MAX_QUERY_COUNT);
vkCmdWriteTimestamp(cb->public.cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + 0); R_VkCombufScopeBegin(pub, g_combuf.entire_combuf_scope_id);
} }
void R_VkCombufEnd( vk_combuf_t* pub ) { void R_VkCombufEnd( vk_combuf_t* pub ) {
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub; vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
vkCmdWriteTimestamp(cb->public.cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + 1); R_VkCombufScopeEnd(pub, 0 | BEGIN_INDEX_TAG, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
XVK_CHECK(vkEndCommandBuffer(cb->public.cmdbuf)); XVK_CHECK(vkEndCommandBuffer(cb->public.cmdbuf));
} }
int R_VkGpuScope_Register(const char *name) { int R_VkGpuScope_Register(const char *name) {
// FIXME if (g_combuf.scopes_count == MAX_SCOPES) {
return -1; gEngine.Con_Printf(S_ERROR "Cannot register GPU profiler scope \"%s\": max number of scope %d reached\n", name, MAX_SCOPES);
return -1;
}
g_combuf.scopes[g_combuf.scopes_count].name = name;
return g_combuf.scopes_count++;
} }
int R_VkCombufScopeBegin(vk_combuf_t* combuf, int scope_id) { int R_VkCombufScopeBegin(vk_combuf_t* cumbuf, int scope_id) {
// FIXME if (scope_id < 0)
return -1; return -1;
ASSERT(scope_id < g_combuf.scopes_count);
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)cumbuf;
if (cb->profiler.scopes_count == MAX_SCOPES)
return -1;
cb->profiler.scopes[cb->profiler.scopes_count] = scope_id;
vkCmdWriteTimestamp(cb->public.cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + cb->profiler.scopes_count * 2);
return (cb->profiler.scopes_count++) | BEGIN_INDEX_TAG;
} }
void R_VkCombufScopeEnd(vk_combuf_t* combuf, int begin_index, VkPipelineStageFlagBits pipeline_stage) { void R_VkCombufScopeEnd(vk_combuf_t* combuf, int begin_index, VkPipelineStageFlagBits pipeline_stage) {
// FIXME if (begin_index < 0)
return;
ASSERT(begin_index & BEGIN_INDEX_TAG);
begin_index ^= BEGIN_INDEX_TAG;
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)combuf;
vkCmdWriteTimestamp(cb->public.cmdbuf, pipeline_stage, g_combuf.timestamp.pool, cb->profiler.timestamps_offset + begin_index * 2 + 1);
}
static uint64_t getGpuTimestampOffsetNs( uint64_t latest_gpu_timestamp, uint64_t latest_cpu_timestamp_ns ) {
// FIXME this is an incorrect check, we need to carry per-device extensions availability somehow. vk_core-vs-device refactoring pending
if (!vkGetCalibratedTimestampsEXT) {
// Estimate based on supposed submission time, assuming that we submit, and it starts computing right after cmdbuffer closure
// which may not be true. But it's all we got
// TODO alternative approach: estimate based on end timestamp
const uint64_t gpu_begin_ns = (double) latest_gpu_timestamp * vk_core.physical_device.properties.limits.timestampPeriod;
return latest_cpu_timestamp_ns - gpu_begin_ns;
}
const VkCalibratedTimestampInfoEXT cti[2] = {
{
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
.pNext = NULL,
.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT,
},
{
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
.pNext = NULL,
#if defined(_WIN32)
.timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT,
#else
.timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
#endif
},
};
uint64_t timestamps[2] = {0};
uint64_t max_deviation[2] = {0};
vkGetCalibratedTimestampsEXT(vk_core.device, 2, cti, timestamps, max_deviation);
const uint64_t cpu = aprof_time_platform_to_ns(timestamps[1]);
const uint64_t gpu = (double)timestamps[0] * vk_core.physical_device.properties.limits.timestampPeriod;
return cpu - gpu;
}
vk_combuf_scopes_t R_VkCombufScopesGet( vk_combuf_t *pub ) {
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub;
uint64_t *const timestamps = g_combuf.timestamp.values + cb->profiler.timestamps_offset;
const int timestamps_count = cb->profiler.scopes_count * 2;
vkGetQueryPoolResults(vk_core.device, g_combuf.timestamp.pool, cb->profiler.timestamps_offset, timestamps_count, timestamps_count * sizeof(uint64_t), timestamps, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
const uint64_t timestamp_offset_ns = getGpuTimestampOffsetNs( timestamps[1], aprof_time_now_ns() );
const double timestamp_period = vk_core.physical_device.properties.limits.timestampPeriod;
for (int i = 0; i < timestamps_count; ++i) {
const uint64_t gpu_ns = timestamps[i] * timestamp_period;
timestamps[i] = timestamp_offset_ns + gpu_ns;
}
APROF_SCOPE_END(function);
return (vk_combuf_scopes_t){
.timestamps = g_combuf.timestamp.values + cb->profiler.timestamps_offset,
.scopes = g_combuf.scopes,
.entries = cb->profiler.scopes,
.entries_count = cb->profiler.scopes_count,
};
} }

View File

@ -4,8 +4,6 @@
typedef struct vk_combuf_s { typedef struct vk_combuf_s {
VkCommandBuffer cmdbuf; VkCommandBuffer cmdbuf;
// VkSemaphore sema_done[2];
// VkFence fence_done;
} vk_combuf_t; } vk_combuf_t;
qboolean R_VkCombuf_Init( void ); qboolean R_VkCombuf_Init( void );
@ -23,4 +21,18 @@ int R_VkGpuScope_Register(const char *name);
int R_VkCombufScopeBegin(vk_combuf_t*, int scope_id); int R_VkCombufScopeBegin(vk_combuf_t*, int scope_id);
void R_VkCombufScopeEnd(vk_combuf_t*, int begin_index, VkPipelineStageFlagBits pipeline_stage); void R_VkCombufScopeEnd(vk_combuf_t*, int begin_index, VkPipelineStageFlagBits pipeline_stage);
// TODO r_vkgpu_scopes_t *R_VkGpuScopesGet( VkCommandBuffer cmdbuf ); typedef struct {
const char *name;
} vk_combuf_scope_t;
typedef struct vk_combuf_scopes_s {
const uint64_t *timestamps;
const vk_combuf_scope_t *scopes;
const int *entries; // index into scopes; each entry consumes 2 values from timestamps array sequentially
int entries_count;
} vk_combuf_scopes_t;
// Reads all the scope timing data (timestamp queries) and returns a list of things happened this frame.
// Prerequisite: all relevant recorded command buffers should've been completed and waited on already.
// The returned pointer remains valid until any next R_VkGpu*() call.
vk_combuf_scopes_t R_VkCombufScopesGet( vk_combuf_t * );

View File

@ -220,10 +220,8 @@ void R_BeginFrame( qboolean clearScene ) {
APROF_SCOPE_BEGIN(begin_frame); APROF_SCOPE_BEGIN(begin_frame);
{ {
// FIXME collect and show more gpu profiling data const vk_combuf_scopes_t gpurofl = R_VkCombufScopesGet(frame->combuf);
const uint64_t gpu_time_begin_ns = 0;// FIXME (qpool->used) ? qpool->results[0] : 0; R_SpeedsDisplayMore(prev_frame_event_index, &gpurofl);
const uint64_t gpu_time_end_ns = 0;// FIXME (qpool->used) ? qpool->results[1] : 0;
R_ShowExtendedProfilingData(prev_frame_event_index, gpu_time_begin_ns, gpu_time_end_ns);
} }
if (vk_core.rtx && FBitSet( vk_rtx->flags, FCVAR_CHANGED )) { if (vk_core.rtx && FBitSet( vk_rtx->flags, FCVAR_CHANGED )) {

View File

@ -1,99 +0,0 @@
#include "vk_gpurofl.h"
#include "vk_querypool.h"
#define MAX_SCOPES 64
#define MAX_COMMANDBUFFERS 8
typedef struct {
const char *name;
} r_vkgpu_scope_t;
#define EVENT_BEGIN 0x100
// B....E
// B....E
// -> B..B.E..E
// -> B.......E
// -> B.E
typedef struct {
VkCommandBuffer cmdbuf;
vk_query_pool_t *qpool;
uint32_t events[MAX_QUERY_COUNT];
} r_vkgpu_cmdbuf_assoc_t;
static struct {
r_vkgpu_scope_t scopes[MAX_SCOPES];
int scopes_count;
// FIXME couple these more tightly
r_vkgpu_cmdbuf_assoc_t assocs[MAX_COMMANDBUFFERS];
r_vkgpu_scopes_t last_frame;
} g_purofl;
int R_VkGpuScopeRegister(const char *name) {
if (g_purofl.scopes_count == MAX_SCOPES) {
gEngine.Con_Printf(S_ERROR "Cannot register GPU profiler scope \"%s\": max number of scope %d reached\n", name, MAX_SCOPES);
return -1;
}
g_purofl.scopes[g_purofl.scopes_count].name = name;
return g_purofl.scopes_count++;
}
void R_VkGpuBegin(VkCommandBuffer cmdbuf, vk_query_pool_t *qpool) {
for (int i = 0; i < MAX_COMMANDBUFFERS; ++i) {
r_vkgpu_cmdbuf_assoc_t *const assoc = g_purofl.assocs + i;
if (!assoc->cmdbuf) {
assoc->cmdbuf = cmdbuf;
assoc->qpool = qpool;
return;
}
if (assoc->cmdbuf == cmdbuf) {
assoc->qpool = qpool;
return;
}
}
ASSERT(!"FIXME Cannot associate cmdbuf with query pool, slots exceeded");
}
static vk_query_pool_t *getQueryPool(VkCommandBuffer cmdbuf) {
for (int i = 0; i < MAX_COMMANDBUFFERS; ++i) {
r_vkgpu_cmdbuf_assoc_t *const assoc = g_purofl.assocs + i;
if (!assoc->cmdbuf)
break;
if (assoc->cmdbuf == cmdbuf)
return assoc->qpool;
}
return NULL;
}
static void writeTimestamp(VkCommandBuffer cmdbuf, int scope_id, VkPipelineStageFlagBits stage, int begin) {
if (scope_id < 0)
return;
// 1. Find query pool for the cmdbuf
vk_query_pool_t *const qpool = getQueryPool(cmdbuf);
if (!qpool) // TODO complain?
return;
// 2. Write timestamp
const int timestamp_id = R_VkQueryPoolTimestamp(qpool, cmdbuf, stage);
// 3. Associate timestamp index with scope_begin
}
/* int R_VkGpuScopeBegin(VkCommandBuffer cmdbuf, int scope_id) { */
/* writeTimestamp(cmdbuf, scope_id, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 1); */
/* } */
/* */
/* void R_VkGpuScopeEnd(VkCommandBuffer cmdbuf, int begin_index, VkPipelineStageFlagBits pipeline_stage) { */
/* writeTimestamp(cmdbuf, scope_id, pipeline_stage, 0); */
/* } */

View File

@ -1,30 +0,0 @@
#pragma once
#include "vk_core.h"
// Return scope_id for the new scope. -1 if failed
// name is expected to be statically allocated
int R_VkGpuScopeRegister(const char *name);
typedef struct vk_query_pool_s vk_query_pool_t;
void R_VkGpuBegin(VkCommandBuffer cmdbuf, vk_query_pool_t *qpool);
// Returns begin_index to use in R_VkGpuScopeEnd
int R_VkGpuScopeBegin(VkCommandBuffer cmdbuf, int scope_id);
void R_VkGpuScopeEnd(VkCommandBuffer cmdbuf, int begin_index, VkPipelineStageFlagBits pipeline_stage);
typedef struct {
const char *name;
uint64_t begin_ns, end_ns;
} r_vkgpu_scope_entry_t;
typedef struct {
r_vkgpu_scope_entry_t *scopes;
int scopes_count;
} r_vkgpu_scopes_t;
// Reads all the scope timing data (timestamp queries) and returns a list of things happened this frame.
// Prerequisite: all relevant recorded command buffers should've been completed and waited on already.
// The returned pointer remains valid until any next R_VkGpu*() call.
r_vkgpu_scopes_t *R_VkGpuScopesGet( VkCommandBuffer cmdbuf );

View File

@ -1,89 +0,0 @@
#include "vk_querypool.h"
#include "profiler.h" // for aprof_time_now_ns()
qboolean R_VkQueryPoolInit( vk_query_pool_t* pool ) {
const VkQueryPoolCreateInfo qpci = {
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.pNext = NULL,
.queryType = VK_QUERY_TYPE_TIMESTAMP,
.queryCount = MAX_QUERY_COUNT,
.flags = 0,
};
XVK_CHECK(vkCreateQueryPool(vk_core.device, &qpci, NULL, &pool->pool));
return true;
}
void R_VkQueryPoolDestroy( vk_query_pool_t *pool ) {
vkDestroyQueryPool(vk_core.device, pool->pool, NULL);
}
int R_VkQueryPoolTimestamp( vk_query_pool_t *pool, VkCommandBuffer cmdbuf, VkPipelineStageFlagBits stage) {
if (pool->used >= MAX_QUERY_COUNT)
return -1;
vkCmdWriteTimestamp(cmdbuf, stage, pool->pool, pool->used);
return pool->used++;
}
void R_VkQueryPoolBegin( vk_query_pool_t *pool, VkCommandBuffer cmdbuf ) {
pool->used = 0;
vkCmdResetQueryPool(cmdbuf, pool->pool, 0, MAX_QUERY_COUNT);
R_VkQueryPoolTimestamp(pool, cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
}
void R_VkQueryPoolEnd( vk_query_pool_t *pool, VkCommandBuffer cmdbuf ) {
R_VkQueryPoolTimestamp(pool, cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
pool->end_timestamp_ns = aprof_time_now_ns();
}
static uint64_t getGpuTimestampOffsetNs( const vk_query_pool_t *pool ) {
// FIXME this is an incorrect check, we need to carry per-device extensions availability somehow. vk_core-vs-device refactoring pending
if (!vkGetCalibratedTimestampsEXT) {
// Estimate based on supposed submission time, assuming that we submit, and it starts computing right after cmdbuffer closure
// which may not be true. But it's all we got
// TODO alternative approach: estimate based on end timestamp
const uint64_t gpu_begin_ns = (double)pool->results[0] * vk_core.physical_device.properties.limits.timestampPeriod;
return pool->end_timestamp_ns - gpu_begin_ns;
}
const VkCalibratedTimestampInfoEXT cti[2] = {
{
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
.pNext = NULL,
.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT,
},
{
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
.pNext = NULL,
#if defined(_WIN32)
.timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT,
#else
.timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
#endif
},
};
uint64_t timestamps[2] = {0};
uint64_t max_deviation[2] = {0};
vkGetCalibratedTimestampsEXT(vk_core.device, 2, cti, timestamps, max_deviation);
const uint64_t cpu = aprof_time_platform_to_ns(timestamps[1]);
const uint64_t gpu = (double)timestamps[0] * vk_core.physical_device.properties.limits.timestampPeriod;
return cpu - gpu;
}
void R_VkQueryPoolGetFrameResults( vk_query_pool_t *pool ) {
if (!pool->used)
return;
vkGetQueryPoolResults(vk_core.device, pool->pool, 0, pool->used, pool->used * sizeof(uint64_t), pool->results, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
const uint64_t timestamp_offset_ns = getGpuTimestampOffsetNs( pool );
const double timestamp_period = vk_core.physical_device.properties.limits.timestampPeriod;
for (int i = 0; i < pool->used; ++i) {
const uint64_t gpu_ns = pool->results[i] * timestamp_period;
pool->results[i] = timestamp_offset_ns + gpu_ns;
}
}

View File

@ -1,20 +0,0 @@
#pragma once
#include "vk_core.h"
#define MAX_QUERY_COUNT 128
typedef struct vk_query_pool_s {
VkQueryPool pool;
int used;
uint64_t results[MAX_QUERY_COUNT];
uint64_t end_timestamp_ns;
} vk_query_pool_t;
qboolean R_VkQueryPoolInit( vk_query_pool_t *pool );
void R_VkQueryPoolDestroy( vk_query_pool_t *pool );
int R_VkQueryPoolTimestamp( vk_query_pool_t *pool, VkCommandBuffer cmdbuf, VkPipelineStageFlagBits stage);
void R_VkQueryPoolBegin( vk_query_pool_t *pool, VkCommandBuffer cmdbuf );
void R_VkQueryPoolEnd( vk_query_pool_t *pool, VkCommandBuffer cmdbuf );
void R_VkQueryPoolGetFrameResults( vk_query_pool_t *pool );