vk: start refactoring profiler
Convert direct stack manipulation to simple and cheap event writing. Draw rudimentary frame times graph. Related to #412
This commit is contained in:
parent
01de5957d8
commit
6ff146e658
|
@ -0,0 +1,29 @@
|
|||
# Frame structure wrt calls from the engine
|
||||
- (eng) SCR_UpdateScreen()
|
||||
- (eng) V_PreRender()
|
||||
- **(ref) R_BeginFrame()**
|
||||
- (eng) V_RenderView()
|
||||
- **(ref) GL_BackendStartFrame()** -- ref_gl only sets speeds string to empty here
|
||||
- (eng) loop over ref_params_t views
|
||||
- **(ref) GL_RenderFrame()**
|
||||
- (eng) ??? SV_DrawDebugTriangles()
|
||||
- **(ref) GL_BackendEndFrame()** -- ref_gl only produces speeds string here
|
||||
- (eng) V_PostRender()
|
||||
- **(ref) R_AllowFog(), R_Set2DMode(true)**
|
||||
- **(ref) R_DrawTileClear()** x N
|
||||
- (vgui) Paint() -> ???
|
||||
- (eng) SCR_RSpeeds()
|
||||
- **(ref) R_SpeedsMessage()**
|
||||
- (eng) CL_DrawString() ...
|
||||
- **(ref) GL_SetRenderMode()**
|
||||
- **(ref) RefGetParm()** for texture resolution
|
||||
- **(ref) Color4ub()**
|
||||
- **(ref) R_DrawStretchPic()**
|
||||
- (eng) SRC_DrawNetGraph()
|
||||
- **(ref) many TriApi calls** -- 2D usage of triapi. we were not ready for this (maybe track R_Set2DMode()?)
|
||||
- **(ref) R_ShowTextures()** kekw
|
||||
- **(ref) VID_ScreenShot()**
|
||||
- **(ref) R_AllowFog(true)**
|
||||
- **(ref) R_EndFrame()**
|
||||
|
||||
|
|
@ -26,19 +26,15 @@
|
|||
|
||||
typedef int aprof_scope_id_t;
|
||||
|
||||
// scope_name should be static const, and not on stack
|
||||
aprof_scope_id_t aprof_scope_init(const char *scope_name);
|
||||
void aprof_scope_event(aprof_scope_id_t, int begin);
|
||||
void aprof_scope_frame( void );
|
||||
// Returns event index for previous frame
|
||||
uint32_t aprof_scope_frame( void );
|
||||
uint64_t aprof_time_now_ns( void );
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
|
||||
struct {
|
||||
uint64_t duration;
|
||||
uint64_t duration_children;
|
||||
int count;
|
||||
} frame;
|
||||
} aprof_scope_t;
|
||||
|
||||
#define APROF_MAX_SCOPES 256
|
||||
|
@ -49,12 +45,43 @@ typedef struct {
|
|||
uint64_t time_begin;
|
||||
} aprof_stack_frame_t;
|
||||
|
||||
#define APROF_EVENT_TYPE_MASK 0x0full
|
||||
#define APROF_EVENT_TYPE_SHIFT 0
|
||||
#define APROF_EVENT_TYPE(event) (((event)&APROF_EVENT_TYPE_MASK) >> APROF_EVENT_TYPE_SHIFT)
|
||||
|
||||
#define APROF_EVENT_SCOPE_ID_MASK 0xff00ull
|
||||
#define APROF_EVENT_SCOPE_ID_SHIFT 8
|
||||
#define APROF_EVENT_SCOPE_ID(event) (((event)&APROF_EVENT_SCOPE_ID_MASK) >> APROF_EVENT_SCOPE_ID_SHIFT)
|
||||
|
||||
#define APROF_EVENT_TIMESTAMP_SHIFT 16
|
||||
#define APROF_EVENT_TIMESTAMP(event) ((event) >> APROF_EVENT_TIMESTAMP_SHIFT)
|
||||
|
||||
#define APROF_EVENT_MAKE(type, scope_id, timestamp) \
|
||||
(((type) << APROF_EVENT_TYPE_SHIFT) & APROF_EVENT_TYPE_MASK) | \
|
||||
(((scope_id) << APROF_EVENT_SCOPE_ID_SHIFT) & APROF_EVENT_SCOPE_ID_MASK) | \
|
||||
((timestamp) << APROF_EVENT_TIMESTAMP_SHIFT)
|
||||
|
||||
enum {
|
||||
APROF_EVENT_FRAME_BOUNDARY = 0,
|
||||
APROF_EVENT_SCOPE_BEGIN = 1,
|
||||
APROF_EVENT_SCOPE_END = 2,
|
||||
};
|
||||
|
||||
// MUST be power of 2
|
||||
#define APROF_EVENT_BUFFER_SIZE (1<<20)
|
||||
#define APROF_EVENT_BUFFER_SIZE_MASK (APROF_EVENT_BUFFER_SIZE - 1)
|
||||
|
||||
typedef uint64_t aprof_event_t;
|
||||
|
||||
typedef struct {
|
||||
aprof_scope_t scopes[APROF_MAX_SCOPES];
|
||||
int num_scopes;
|
||||
|
||||
aprof_stack_frame_t stack[APROF_MAX_STACK_DEPTH];
|
||||
int stack_depth;
|
||||
aprof_event_t events[APROF_EVENT_BUFFER_SIZE];
|
||||
uint32_t events_write;
|
||||
uint32_t events_last_frame;
|
||||
|
||||
int current_frame_wraparounds;
|
||||
|
||||
// TODO event log for chrome://trace (or similar) export and analysis
|
||||
} aprof_state_t;
|
||||
|
@ -104,52 +131,24 @@ void aprof_scope_event(aprof_scope_id_t scope_id, int begin) {
|
|||
if (scope_id < 0 || scope_id >= g_aprof.num_scopes)
|
||||
return;
|
||||
|
||||
// TODO improve performance by just writing into an event array here
|
||||
// analysis should be done on-demand later
|
||||
if (begin) {
|
||||
const int s = g_aprof.stack_depth;
|
||||
if (g_aprof.stack_depth == APROF_MAX_STACK_DEPTH)
|
||||
return;
|
||||
g_aprof.events[g_aprof.events_write] = APROF_EVENT_MAKE(begin?APROF_EVENT_SCOPE_BEGIN:APROF_EVENT_SCOPE_END, scope_id, now);
|
||||
g_aprof.events_write = (g_aprof.events_write + 1) & APROF_EVENT_BUFFER_SIZE_MASK;
|
||||
|
||||
g_aprof.stack[s].scope = scope_id;
|
||||
g_aprof.stack[s].time_begin = now;
|
||||
g_aprof.stack_depth++;
|
||||
} else {
|
||||
aprof_scope_t *scope;
|
||||
const aprof_stack_frame_t *const frame = g_aprof.stack + g_aprof.stack_depth - 1;
|
||||
uint64_t frame_duration;
|
||||
|
||||
assert(g_aprof.stack_depth > 0);
|
||||
if (g_aprof.stack_depth == 0)
|
||||
return;
|
||||
|
||||
assert(frame->scope == scope_id);
|
||||
|
||||
scope = g_aprof.scopes + frame->scope;
|
||||
frame_duration = now - frame->time_begin;
|
||||
scope->frame.duration += frame_duration;
|
||||
scope->frame.count++;
|
||||
|
||||
if (g_aprof.stack_depth > 1) {
|
||||
const aprof_stack_frame_t *const parent_frame = g_aprof.stack + g_aprof.stack_depth - 2;
|
||||
aprof_scope_t *const parent_scope = g_aprof.scopes + parent_frame->scope;
|
||||
|
||||
assert(parent_frame->scope >= 0);
|
||||
assert(parent_frame->scope < g_aprof.num_scopes);
|
||||
|
||||
parent_scope->frame.duration_children += frame_duration;
|
||||
}
|
||||
|
||||
g_aprof.stack_depth--;
|
||||
}
|
||||
if (g_aprof.events_write == g_aprof.events_last_frame)
|
||||
++g_aprof.current_frame_wraparounds;
|
||||
}
|
||||
|
||||
void aprof_scope_frame( void ) {
|
||||
assert(g_aprof.stack_depth == 0);
|
||||
for (int i = 0; i < g_aprof.num_scopes; ++i) {
|
||||
aprof_scope_t *const scope = g_aprof.scopes + i;
|
||||
memset(&scope->frame, 0, sizeof(scope->frame));
|
||||
}
|
||||
uint32_t aprof_scope_frame( void ) {
|
||||
const uint64_t now = aprof_time_now_ns();
|
||||
const uint32_t previous_frame = g_aprof.events_last_frame;
|
||||
|
||||
g_aprof.events_last_frame = g_aprof.events_write;
|
||||
g_aprof.events[g_aprof.events_write] = APROF_EVENT_MAKE(APROF_EVENT_FRAME_BOUNDARY, 0, now);
|
||||
g_aprof.events_write = (g_aprof.events_write + 1) & APROF_EVENT_BUFFER_SIZE_MASK;
|
||||
|
||||
g_aprof.current_frame_wraparounds = 0;
|
||||
|
||||
return previous_frame;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
#include "r_slows.h"
|
||||
#include "vk_light.h" // For stats
|
||||
#include "shaders/ray_interop.h" // stats: struct LightCluster
|
||||
#include "vk_overlay.h"
|
||||
#include "vk_framectl.h"
|
||||
|
||||
#include "profiler.h"
|
||||
|
||||
#define MAX_FRAMES_HISTORY 256
|
||||
|
||||
static struct {
|
||||
float frame_times[MAX_FRAMES_HISTORY];
|
||||
uint32_t frame_num;
|
||||
} g_slows;
|
||||
|
||||
|
||||
static float linearstep(float min, float max, float v) {
|
||||
if (v <= min) return 0;
|
||||
if (v >= max) return 1;
|
||||
return (v - min) / (max - min);
|
||||
}
|
||||
|
||||
// FIXME move this to r_speeds or something like that
|
||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_index) {
|
||||
{
|
||||
const int dirty = g_lights.stats.dirty_cells;
|
||||
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
|
||||
}
|
||||
|
||||
const uint32_t events = g_aprof.events_last_frame - prev_frame_index;
|
||||
const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - APROF_EVENT_TIMESTAMP(g_aprof.events[prev_frame_index]);
|
||||
const float frame_time = delta_ns / 1e6;
|
||||
|
||||
gEngine.Con_NPrintf(5, "aprof events this frame: %u, wraps: %d, frame time: %.03fms (%lluns)\n", events, g_aprof.current_frame_wraparounds, frame_time, delta_ns);
|
||||
|
||||
g_slows.frame_times[g_slows.frame_num] = frame_time;
|
||||
g_slows.frame_num = (g_slows.frame_num + 1) % MAX_FRAMES_HISTORY;
|
||||
|
||||
const float width = (float)vk_frame.width / MAX_FRAMES_HISTORY;
|
||||
for (int i = 0; i < MAX_FRAMES_HISTORY; ++i) {
|
||||
const float frame_time = g_slows.frame_times[i];
|
||||
CL_FillRGBA(i * width, 100, width, frame_time * 2.f,
|
||||
//255, 255, 255, 255);
|
||||
255*linearstep(16.67, 16.67*2.f, frame_time), 255, 0, 127);
|
||||
}
|
||||
|
||||
/* gEngine.Con_NPrintf(5, "Perf scopes:"); */
|
||||
/* for (int i = 0; i < g_aprof.num_scopes; ++i) { */
|
||||
/* const aprof_scope_t *const scope = g_aprof.scopes + i; */
|
||||
/* gEngine.Con_NPrintf(6 + i, "%s: c%d t%.03f(%.03f)ms s%.03f(%.03f)ms", scope->name, */
|
||||
/* scope->frame.count, */
|
||||
/* scope->frame.duration / 1e6, */
|
||||
/* (scope->frame.duration / 1e6) / scope->frame.count, */
|
||||
/* (scope->frame.duration - scope->frame.duration_children) / 1e6, */
|
||||
/* (scope->frame.duration - scope->frame.duration_children) / 1e6 / scope->frame.count); */
|
||||
/* } */
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_event_index);
|
|
@ -15,6 +15,7 @@
|
|||
#include "shaders/ray_interop.h" // stats: struct LightCluster
|
||||
|
||||
#include "profiler.h"
|
||||
#include "r_slows.h"
|
||||
|
||||
#include "eiface.h" // ARRAYSIZE
|
||||
|
||||
|
@ -185,33 +186,16 @@ static void updateGamma( void ) {
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME move this to r_speeds or something like that
|
||||
static void showProfilingData( void ) {
|
||||
{
|
||||
const int dirty = g_lights.stats.dirty_cells;
|
||||
gEngine.Con_NPrintf(4, "Dirty light cells: %d, size = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
|
||||
}
|
||||
|
||||
gEngine.Con_NPrintf(5, "Perf scopes:");
|
||||
for (int i = 0; i < g_aprof.num_scopes; ++i) {
|
||||
const aprof_scope_t *const scope = g_aprof.scopes + i;
|
||||
gEngine.Con_NPrintf(6 + i, "%s: c%d t%.03f(%.03f)ms s%.03f(%.03f)ms", scope->name,
|
||||
scope->frame.count,
|
||||
scope->frame.duration / 1e6,
|
||||
(scope->frame.duration / 1e6) / scope->frame.count,
|
||||
(scope->frame.duration - scope->frame.duration_children) / 1e6,
|
||||
(scope->frame.duration - scope->frame.duration_children) / 1e6 / scope->frame.count);
|
||||
}
|
||||
}
|
||||
|
||||
void R_BeginFrame( qboolean clearScene ) {
|
||||
ASSERT(g_frame.current.phase == Phase_Submitted || g_frame.current.phase == Phase_Idle);
|
||||
const int prev_frame_index = g_frame.current.index % MAX_CONCURRENT_FRAMES;
|
||||
g_frame.current.index = (g_frame.current.index + 1) % MAX_CONCURRENT_FRAMES;
|
||||
const VkCommandBuffer cmdbuf = vk_frame.cmdbuf = g_frame.command.buffers[g_frame.current.index];
|
||||
|
||||
showProfilingData();
|
||||
aprof_scope_frame();
|
||||
{
|
||||
const uint32_t prev_frame_event_index = aprof_scope_frame();
|
||||
R_ShowExtendedProfilingData(prev_frame_event_index);
|
||||
}
|
||||
|
||||
APROF_SCOPE_BEGIN(frame);
|
||||
APROF_SCOPE_BEGIN(begin_frame);
|
||||
|
@ -349,10 +333,10 @@ static void submit( VkCommandBuffer cmdbuf, qboolean wait ) {
|
|||
XVK_CHECK(vkWaitForFences(vk_core.device, 1, g_frame.fence_done + g_frame.current.index, VK_TRUE, INT64_MAX));
|
||||
APROF_SCOPE_END(frame_gpu_wait);
|
||||
|
||||
if (vk_core.debug) {
|
||||
// FIXME more scopes
|
||||
XVK_CHECK(vkQueueWaitIdle(vk_core.queue));
|
||||
}
|
||||
/* if (vk_core.debug) { */
|
||||
/* // FIXME more scopes */
|
||||
/* XVK_CHECK(vkQueueWaitIdle(vk_core.queue)); */
|
||||
/* } */
|
||||
g_frame.current.phase = Phase_Idle;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue