add trivial on-screen profiler printout
This commit is contained in:
parent
c45ac21a71
commit
8756f648ca
|
@ -0,0 +1,136 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define APROF_SCOPE_DECLARE(scope) \
|
||||
static aprof_scope_id_t _aprof_scope_id_##scope = -1
|
||||
|
||||
// scope_name is expected to be static and alive for the entire duration of the program
|
||||
#define APROF_SCOPE_INIT(scope, scope_name) \
|
||||
_aprof_scope_id_##scope = aprof_scope_init(scope_name)
|
||||
|
||||
#define APROF_SCOPE_BEGIN(scope) \
|
||||
aprof_scope_event(_aprof_scope_id_##scope, 1)
|
||||
|
||||
#define APROF_TOKENPASTE(x, y) x ## y
|
||||
#define APROF_TOKENPASTE2(x, y) APROF_TOKENPASTE(x, y)
|
||||
|
||||
#define APROF_SCOPE_BEGIN_EARLY(scope) \
|
||||
const int APROF_TOKENPASTE2(_aprof_dummy, __LINE__) = (aprof_scope_event(_aprof_scope_id_##scope, 1), 0)
|
||||
|
||||
#define APROF_SCOPE_END(scope) \
|
||||
aprof_scope_event(_aprof_scope_id_##scope, 0)
|
||||
|
||||
|
||||
typedef int aprof_scope_id_t;
|
||||
|
||||
aprof_scope_id_t aprof_scope_init(const char *scope_name);
|
||||
void aprof_scope_event(aprof_scope_id_t, int begin);
|
||||
void aprof_scope_frame( void );
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
|
||||
struct {
|
||||
uint64_t duration;
|
||||
uint64_t duration_children;
|
||||
} frame;
|
||||
} aprof_scope_t;
|
||||
|
||||
#define APROF_MAX_SCOPES 256
|
||||
#define APROF_MAX_STACK_DEPTH 32
|
||||
|
||||
typedef struct {
|
||||
aprof_scope_id_t scope;
|
||||
uint64_t time_begin;
|
||||
} aprof_stack_frame_t;
|
||||
|
||||
typedef struct {
|
||||
aprof_scope_t scopes[APROF_MAX_SCOPES];
|
||||
int num_scopes;
|
||||
|
||||
aprof_stack_frame_t stack[APROF_MAX_STACK_DEPTH];
|
||||
int stack_depth;
|
||||
|
||||
// TODO event log for chrome://trace (or similar) export and analysis
|
||||
} aprof_state_t;
|
||||
|
||||
extern aprof_state_t g_aprof;
|
||||
|
||||
#if defined(APROF_IMPLEMENT)
|
||||
|
||||
#ifdef __linux__
|
||||
#include <time.h>
|
||||
static uint64_t _aprof_time_now( void ) {
|
||||
struct timespec tp;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tp);
|
||||
return tp.tv_nsec + tp.tv_sec * 1000000000ull;
|
||||
}
|
||||
#else
|
||||
#error aprof is not implemented for this os
|
||||
#endif
|
||||
|
||||
aprof_state_t g_aprof = {0};
|
||||
|
||||
aprof_scope_id_t aprof_scope_init(const char *scope_name) {
|
||||
if (g_aprof.num_scopes == APROF_MAX_SCOPES)
|
||||
return -1;
|
||||
|
||||
g_aprof.scopes[g_aprof.num_scopes].name = scope_name;
|
||||
return g_aprof.num_scopes++;
|
||||
}
|
||||
|
||||
void aprof_scope_event(aprof_scope_id_t scope_id, int begin) {
|
||||
const uint64_t now = _aprof_time_now();
|
||||
if (scope_id < 0 || scope_id >= g_aprof.num_scopes)
|
||||
return;
|
||||
|
||||
// TODO improve performance by just writing into an event array here
|
||||
// analysis should be done on-demand later
|
||||
if (begin) {
|
||||
const int s = g_aprof.stack_depth;
|
||||
if (g_aprof.stack_depth == APROF_MAX_STACK_DEPTH)
|
||||
return;
|
||||
|
||||
g_aprof.stack[s].scope = scope_id;
|
||||
g_aprof.stack[s].time_begin = now;
|
||||
g_aprof.stack_depth++;
|
||||
} else {
|
||||
aprof_scope_t *scope;
|
||||
const aprof_stack_frame_t *const frame = g_aprof.stack + g_aprof.stack_depth - 1;
|
||||
uint64_t frame_duration;
|
||||
|
||||
assert(g_aprof.stack_depth > 0);
|
||||
if (g_aprof.stack_depth == 0)
|
||||
return;
|
||||
|
||||
assert(frame->scope == scope_id);
|
||||
|
||||
scope = g_aprof.scopes + frame->scope;
|
||||
frame_duration = now - frame->time_begin;
|
||||
scope->frame.duration += frame_duration;
|
||||
|
||||
if (g_aprof.stack_depth > 1) {
|
||||
const aprof_stack_frame_t *const parent_frame = g_aprof.stack + g_aprof.stack_depth - 2;
|
||||
aprof_scope_t *const parent_scope = g_aprof.scopes + parent_frame->scope;
|
||||
|
||||
assert(parent_frame->scope >= 0);
|
||||
assert(parent_frame->scope < g_aprof.num_scopes);
|
||||
|
||||
parent_scope->frame.duration_children += frame_duration;
|
||||
}
|
||||
|
||||
g_aprof.stack_depth--;
|
||||
}
|
||||
}
|
||||
|
||||
void aprof_scope_frame( void ) {
|
||||
assert(g_aprof.stack_depth == 0);
|
||||
for (int i = 0; i < g_aprof.num_scopes; ++i) {
|
||||
aprof_scope_t *const scope = g_aprof.scopes + i;
|
||||
memset(&scope->frame, 0, sizeof(scope->frame));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -131,7 +131,7 @@ void CL_FillRGBABlend( float x, float y, float w, float h, int r, int g, int b,
|
|||
drawFill(x, y, w, h, r, g, b, a, kRenderTransColor);
|
||||
}
|
||||
|
||||
void vk2dBegin( void )
|
||||
static void XVK_2dClear( void )
|
||||
{
|
||||
g2d.num_pics = 0;
|
||||
g2d.current_batch = 0;
|
||||
|
@ -170,6 +170,8 @@ void vk2dEnd( VkCommandBuffer cmdbuf )
|
|||
|
||||
if (vk_core.debug)
|
||||
vkCmdEndDebugUtilsLabelEXT(cmdbuf);
|
||||
|
||||
XVK_2dClear();
|
||||
}
|
||||
|
||||
static qboolean createPipelines( void )
|
||||
|
|
|
@ -11,5 +11,4 @@ void CL_FillRGBABlend( float x, float y, float w, float h, int r, int g, int b,
|
|||
|
||||
qboolean initVk2d( void );
|
||||
void deinitVk2d( void );
|
||||
void vk2dBegin( void );
|
||||
void vk2dEnd( VkCommandBuffer cmdbuf );
|
||||
|
|
|
@ -28,6 +28,9 @@
|
|||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#define APROF_IMPLEMENT
|
||||
#include "profiler.h"
|
||||
|
||||
#define XVK_PARSE_VERSION(v) \
|
||||
VK_VERSION_MAJOR(v), \
|
||||
VK_VERSION_MINOR(v), \
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include "vk_render.h"
|
||||
#include "vk_rtx.h"
|
||||
|
||||
#include "profiler.h"
|
||||
|
||||
#include "eiface.h"
|
||||
|
||||
#include <string.h>
|
||||
|
@ -24,6 +26,14 @@ static struct
|
|||
int last_frame_index; // Index of previous fully drawn frame into images array
|
||||
} g_frame;
|
||||
|
||||
#define PROFILER_SCOPES(X) \
|
||||
X(end_frame , "R_EndFrame"); \
|
||||
X(frame_gpu_wait, "Wait for GPU"); \
|
||||
|
||||
#define SCOPE_DECLARE(scope, name) APROF_SCOPE_DECLARE(scope)
|
||||
PROFILER_SCOPES(SCOPE_DECLARE)
|
||||
#undef SCOPE_DECLARE
|
||||
|
||||
static VkFormat findSupportedImageFormat(const VkFormat *candidates, VkImageTiling tiling, VkFormatFeatureFlags features) {
|
||||
for (int i = 0; candidates[i] != VK_FORMAT_UNDEFINED; ++i) {
|
||||
VkFormatProperties props;
|
||||
|
@ -270,6 +280,18 @@ static qboolean createSwapchain( void )
|
|||
|
||||
void R_BeginFrame( qboolean clearScene )
|
||||
{
|
||||
{
|
||||
gEngine.Con_NPrintf(5, "Perf scopes:");
|
||||
for (int i = 0; i < g_aprof.num_scopes; ++i) {
|
||||
const aprof_scope_t *const scope = g_aprof.scopes + i;
|
||||
gEngine.Con_NPrintf(6 + i, "%s: total=%.03fms self=%.03fms", scope->name,
|
||||
scope->frame.duration / 1e6,
|
||||
(scope->frame.duration - scope->frame.duration_children) / 1e6);
|
||||
}
|
||||
|
||||
aprof_scope_frame();
|
||||
}
|
||||
|
||||
// TODO should we handle multiple R_BeginFrame w/o R_EndFrame gracefully?
|
||||
ASSERT(g_frame.swapchain_image_index == -1);
|
||||
|
||||
|
@ -328,7 +350,6 @@ void R_BeginFrame( qboolean clearScene )
|
|||
}
|
||||
}
|
||||
|
||||
vk2dBegin();
|
||||
VK_RenderBegin( g_frame.rtx_enabled );
|
||||
|
||||
{
|
||||
|
@ -347,6 +368,7 @@ void VK_RenderFrame( const struct ref_viewpass_s *rvp )
|
|||
|
||||
void R_EndFrame( void )
|
||||
{
|
||||
APROF_SCOPE_BEGIN_EARLY(end_frame);
|
||||
VkClearValue clear_value[] = {
|
||||
{.color = {{1., 0., 0., 0.}}},
|
||||
{.depthStencil = {1., 0.}} // TODO reverse-z
|
||||
|
@ -427,9 +449,11 @@ void R_EndFrame( void )
|
|||
}
|
||||
}
|
||||
|
||||
APROF_SCOPE_BEGIN(frame_gpu_wait);
|
||||
// TODO bad sync
|
||||
XVK_CHECK(vkWaitForFences(vk_core.device, 1, &g_frame.fence, VK_TRUE, INT64_MAX));
|
||||
XVK_CHECK(vkResetFences(vk_core.device, 1, &g_frame.fence));
|
||||
APROF_SCOPE_END(frame_gpu_wait);
|
||||
|
||||
if (vk_core.debug)
|
||||
XVK_CHECK(vkQueueWaitIdle(vk_core.queue));
|
||||
|
@ -442,6 +466,8 @@ void R_EndFrame( void )
|
|||
|
||||
g_frame.last_frame_index = g_frame.swapchain_image_index;
|
||||
g_frame.swapchain_image_index = -1;
|
||||
|
||||
APROF_SCOPE_END(end_frame);
|
||||
}
|
||||
|
||||
static void toggleRaytracing( void ) {
|
||||
|
@ -452,6 +478,7 @@ static void toggleRaytracing( void ) {
|
|||
|
||||
qboolean VK_FrameCtlInit( void )
|
||||
{
|
||||
PROFILER_SCOPES(APROF_SCOPE_INIT);
|
||||
vk_frame.render_pass.raster = createRenderPass(false);
|
||||
if (vk_core.rtx)
|
||||
vk_frame.render_pass.after_ray_tracing = createRenderPass(true);
|
||||
|
|
Loading…
Reference in New Issue