vk: start refactoring profiler

Convert direct stack manipulation to simple and cheap event writing.
Draw rudimentary frame times graph.

Related to #412
This commit is contained in:
Ivan Avdeev 2023-03-09 11:36:31 -08:00
parent 01de5957d8
commit 6ff146e658
5 changed files with 150 additions and 77 deletions

29
ref/vk/NOTES.md Normal file
View File

@ -0,0 +1,29 @@
# Frame structure wrt calls from the engine
- (eng) SCR_UpdateScreen()
- (eng) V_PreRender()
- **(ref) R_BeginFrame()**
- (eng) V_RenderView()
- **(ref) GL_BackendStartFrame()** -- ref_gl only sets speeds string to empty here
- (eng) loop over ref_params_t views
- **(ref) GL_RenderFrame()**
- (eng) ??? SV_DrawDebugTriangles()
- **(ref) GL_BackendEndFrame()** -- ref_gl only produces speeds string here
- (eng) V_PostRender()
- **(ref) R_AllowFog(), R_Set2DMode(true)**
- **(ref) R_DrawTileClear()** x N
- (vgui) Paint() -> ???
- (eng) SCR_RSpeeds()
- **(ref) R_SpeedsMessage()**
- (eng) CL_DrawString() ...
- **(ref) GL_SetRenderMode()**
- **(ref) RefGetParm()** for texture resolution
- **(ref) Color4ub()**
- **(ref) R_DrawStretchPic()**
- (eng) SRC_DrawNetGraph()
- **(ref) many TriApi calls** -- 2D usage of triapi. we were not ready for this (maybe track R_Set2DMode()?)
- **(ref) R_ShowTextures()** kekw
- **(ref) VID_ScreenShot()**
- **(ref) R_AllowFog(true)**
- **(ref) R_EndFrame()**

View File

@ -26,19 +26,15 @@
typedef int aprof_scope_id_t;
// scope_name should be static const, and not on stack
aprof_scope_id_t aprof_scope_init(const char *scope_name);
void aprof_scope_event(aprof_scope_id_t, int begin);
void aprof_scope_frame( void );
// Returns event index for previous frame
uint32_t aprof_scope_frame( void );
uint64_t aprof_time_now_ns( void );
typedef struct {
const char *name;
struct {
uint64_t duration;
uint64_t duration_children;
int count;
} frame;
} aprof_scope_t;
#define APROF_MAX_SCOPES 256
@ -49,12 +45,43 @@ typedef struct {
uint64_t time_begin;
} aprof_stack_frame_t;
#define APROF_EVENT_TYPE_MASK 0x0full
#define APROF_EVENT_TYPE_SHIFT 0
#define APROF_EVENT_TYPE(event) (((event)&APROF_EVENT_TYPE_MASK) >> APROF_EVENT_TYPE_SHIFT)
#define APROF_EVENT_SCOPE_ID_MASK 0xff00ull
#define APROF_EVENT_SCOPE_ID_SHIFT 8
#define APROF_EVENT_SCOPE_ID(event) (((event)&APROF_EVENT_SCOPE_ID_MASK) >> APROF_EVENT_SCOPE_ID_SHIFT)
#define APROF_EVENT_TIMESTAMP_SHIFT 16
#define APROF_EVENT_TIMESTAMP(event) ((event) >> APROF_EVENT_TIMESTAMP_SHIFT)
#define APROF_EVENT_MAKE(type, scope_id, timestamp) \
(((type) << APROF_EVENT_TYPE_SHIFT) & APROF_EVENT_TYPE_MASK) | \
(((scope_id) << APROF_EVENT_SCOPE_ID_SHIFT) & APROF_EVENT_SCOPE_ID_MASK) | \
((timestamp) << APROF_EVENT_TIMESTAMP_SHIFT)
enum {
APROF_EVENT_FRAME_BOUNDARY = 0,
APROF_EVENT_SCOPE_BEGIN = 1,
APROF_EVENT_SCOPE_END = 2,
};
// MUST be power of 2
#define APROF_EVENT_BUFFER_SIZE (1<<20)
#define APROF_EVENT_BUFFER_SIZE_MASK (APROF_EVENT_BUFFER_SIZE - 1)
typedef uint64_t aprof_event_t;
typedef struct {
aprof_scope_t scopes[APROF_MAX_SCOPES];
int num_scopes;
aprof_stack_frame_t stack[APROF_MAX_STACK_DEPTH];
int stack_depth;
aprof_event_t events[APROF_EVENT_BUFFER_SIZE];
uint32_t events_write;
uint32_t events_last_frame;
int current_frame_wraparounds;
// TODO event log for chrome://trace (or similar) export and analysis
} aprof_state_t;
@ -104,52 +131,24 @@ void aprof_scope_event(aprof_scope_id_t scope_id, int begin) {
if (scope_id < 0 || scope_id >= g_aprof.num_scopes)
return;
// TODO improve performance by just writing into an event array here
// analysis should be done on-demand later
if (begin) {
const int s = g_aprof.stack_depth;
if (g_aprof.stack_depth == APROF_MAX_STACK_DEPTH)
return;
g_aprof.events[g_aprof.events_write] = APROF_EVENT_MAKE(begin?APROF_EVENT_SCOPE_BEGIN:APROF_EVENT_SCOPE_END, scope_id, now);
g_aprof.events_write = (g_aprof.events_write + 1) & APROF_EVENT_BUFFER_SIZE_MASK;
g_aprof.stack[s].scope = scope_id;
g_aprof.stack[s].time_begin = now;
g_aprof.stack_depth++;
} else {
aprof_scope_t *scope;
const aprof_stack_frame_t *const frame = g_aprof.stack + g_aprof.stack_depth - 1;
uint64_t frame_duration;
assert(g_aprof.stack_depth > 0);
if (g_aprof.stack_depth == 0)
return;
assert(frame->scope == scope_id);
scope = g_aprof.scopes + frame->scope;
frame_duration = now - frame->time_begin;
scope->frame.duration += frame_duration;
scope->frame.count++;
if (g_aprof.stack_depth > 1) {
const aprof_stack_frame_t *const parent_frame = g_aprof.stack + g_aprof.stack_depth - 2;
aprof_scope_t *const parent_scope = g_aprof.scopes + parent_frame->scope;
assert(parent_frame->scope >= 0);
assert(parent_frame->scope < g_aprof.num_scopes);
parent_scope->frame.duration_children += frame_duration;
}
g_aprof.stack_depth--;
}
if (g_aprof.events_write == g_aprof.events_last_frame)
++g_aprof.current_frame_wraparounds;
}
void aprof_scope_frame( void ) {
assert(g_aprof.stack_depth == 0);
for (int i = 0; i < g_aprof.num_scopes; ++i) {
aprof_scope_t *const scope = g_aprof.scopes + i;
memset(&scope->frame, 0, sizeof(scope->frame));
}
uint32_t aprof_scope_frame( void ) {
const uint64_t now = aprof_time_now_ns();
const uint32_t previous_frame = g_aprof.events_last_frame;
g_aprof.events_last_frame = g_aprof.events_write;
g_aprof.events[g_aprof.events_write] = APROF_EVENT_MAKE(APROF_EVENT_FRAME_BOUNDARY, 0, now);
g_aprof.events_write = (g_aprof.events_write + 1) & APROF_EVENT_BUFFER_SIZE_MASK;
g_aprof.current_frame_wraparounds = 0;
return previous_frame;
}
#endif

57
ref/vk/r_slows.c Normal file
View File

@ -0,0 +1,57 @@
#include "r_slows.h"
#include "vk_light.h" // For stats
#include "shaders/ray_interop.h" // stats: struct LightCluster
#include "vk_overlay.h"
#include "vk_framectl.h"
#include "profiler.h"
#define MAX_FRAMES_HISTORY 256
static struct {
float frame_times[MAX_FRAMES_HISTORY];
uint32_t frame_num;
} g_slows;
static float linearstep(float min, float max, float v) {
if (v <= min) return 0;
if (v >= max) return 1;
return (v - min) / (max - min);
}
// FIXME move this to r_speeds or something like that
void R_ShowExtendedProfilingData(uint32_t prev_frame_index) {
{
const int dirty = g_lights.stats.dirty_cells;
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
}
const uint32_t events = g_aprof.events_last_frame - prev_frame_index;
const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - APROF_EVENT_TIMESTAMP(g_aprof.events[prev_frame_index]);
const float frame_time = delta_ns / 1e6;
gEngine.Con_NPrintf(5, "aprof events this frame: %u, wraps: %d, frame time: %.03fms (%lluns)\n", events, g_aprof.current_frame_wraparounds, frame_time, delta_ns);
g_slows.frame_times[g_slows.frame_num] = frame_time;
g_slows.frame_num = (g_slows.frame_num + 1) % MAX_FRAMES_HISTORY;
const float width = (float)vk_frame.width / MAX_FRAMES_HISTORY;
for (int i = 0; i < MAX_FRAMES_HISTORY; ++i) {
const float frame_time = g_slows.frame_times[i];
CL_FillRGBA(i * width, 100, width, frame_time * 2.f,
//255, 255, 255, 255);
255*linearstep(16.67, 16.67*2.f, frame_time), 255, 0, 127);
}
/* gEngine.Con_NPrintf(5, "Perf scopes:"); */
/* for (int i = 0; i < g_aprof.num_scopes; ++i) { */
/* const aprof_scope_t *const scope = g_aprof.scopes + i; */
/* gEngine.Con_NPrintf(6 + i, "%s: c%d t%.03f(%.03f)ms s%.03f(%.03f)ms", scope->name, */
/* scope->frame.count, */
/* scope->frame.duration / 1e6, */
/* (scope->frame.duration / 1e6) / scope->frame.count, */
/* (scope->frame.duration - scope->frame.duration_children) / 1e6, */
/* (scope->frame.duration - scope->frame.duration_children) / 1e6 / scope->frame.count); */
/* } */
}

4
ref/vk/r_slows.h Normal file
View File

@ -0,0 +1,4 @@
#pragma once
#include <stdint.h>
void R_ShowExtendedProfilingData(uint32_t prev_frame_event_index);

View File

@ -15,6 +15,7 @@
#include "shaders/ray_interop.h" // stats: struct LightCluster
#include "profiler.h"
#include "r_slows.h"
#include "eiface.h" // ARRAYSIZE
@ -185,33 +186,16 @@ static void updateGamma( void ) {
}
}
// FIXME move this to r_speeds or something like that
static void showProfilingData( void ) {
{
const int dirty = g_lights.stats.dirty_cells;
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
}
gEngine.Con_NPrintf(5, "Perf scopes:");
for (int i = 0; i < g_aprof.num_scopes; ++i) {
const aprof_scope_t *const scope = g_aprof.scopes + i;
gEngine.Con_NPrintf(6 + i, "%s: c%d t%.03f(%.03f)ms s%.03f(%.03f)ms", scope->name,
scope->frame.count,
scope->frame.duration / 1e6,
(scope->frame.duration / 1e6) / scope->frame.count,
(scope->frame.duration - scope->frame.duration_children) / 1e6,
(scope->frame.duration - scope->frame.duration_children) / 1e6 / scope->frame.count);
}
}
void R_BeginFrame( qboolean clearScene ) {
ASSERT(g_frame.current.phase == Phase_Submitted || g_frame.current.phase == Phase_Idle);
const int prev_frame_index = g_frame.current.index % MAX_CONCURRENT_FRAMES;
g_frame.current.index = (g_frame.current.index + 1) % MAX_CONCURRENT_FRAMES;
const VkCommandBuffer cmdbuf = vk_frame.cmdbuf = g_frame.command.buffers[g_frame.current.index];
showProfilingData();
aprof_scope_frame();
{
const uint32_t prev_frame_event_index = aprof_scope_frame();
R_ShowExtendedProfilingData(prev_frame_event_index);
}
APROF_SCOPE_BEGIN(frame);
APROF_SCOPE_BEGIN(begin_frame);
@ -349,10 +333,10 @@ static void submit( VkCommandBuffer cmdbuf, qboolean wait ) {
XVK_CHECK(vkWaitForFences(vk_core.device, 1, g_frame.fence_done + g_frame.current.index, VK_TRUE, INT64_MAX));
APROF_SCOPE_END(frame_gpu_wait);
if (vk_core.debug) {
// FIXME more scopes
XVK_CHECK(vkQueueWaitIdle(vk_core.queue));
}
/* if (vk_core.debug) { */
/* // FIXME more scopes */
/* XVK_CHECK(vkQueueWaitIdle(vk_core.queue)); */
/* } */
g_frame.current.phase = Phase_Idle;
}
}