vk: profiler: add a way to add arbitrary metrics to profiler

This commit is contained in:
Ivan Avdeev 2023-03-23 09:47:17 -07:00 committed by Ivan Avdeev
parent 576b4163b9
commit e0e9305628
4 changed files with 89 additions and 53 deletions

View File

@ -1,6 +1,4 @@
#include "r_speeds.h"
#include "vk_light.h" // For stats
#include "shaders/ray_interop.h" // stats: struct LightCluster
#include "vk_overlay.h"
#include "vk_framectl.h"
#include "vk_cvar.h"
@ -12,6 +10,7 @@
#define MAX_SPEEDS_MESSAGE 1024
#define MAX_FRAMES_HISTORY 256
#define MAX_SPEEDS_METRICS 32
#define TARGET_FRAME_TIME (1000.f / 60.f)
// Valid bits for `r_speeds` argument:
@ -38,12 +37,15 @@ static struct {
int glyph_width, glyph_height;
} font_metrics;
r_speeds_metric_t metrics[MAX_SPEEDS_METRICS];
int metrics_count;
struct {
uint64_t ref_cpu_time;
uint64_t ref_cpu_wait_time;
char message[MAX_SPEEDS_MESSAGE];
} frame;
} g_slows;
} g_speeds;
static void speedsPrintf( const char *msg, ... ) {
va_list argptr;
@ -53,7 +55,7 @@ static void speedsPrintf( const char *msg, ... ) {
Q_vsprintf( text, msg, argptr );
va_end( argptr );
Q_strncat( g_slows.frame.message, text, sizeof( g_slows.frame.message ));
Q_strncat( g_speeds.frame.message, text, sizeof( g_speeds.frame.message ));
}
static float linearstep(float min, float max, float v) {
@ -82,7 +84,7 @@ static void drawTimeBar(uint64_t begin_time_ns, float time_scale_ms, int64_t beg
// Tweak this if scope names escape the block boundaries
char tmp[64];
tmp[0] = '\0';
const int glyph_width = g_slows.font_metrics.glyph_width;
const int glyph_width = g_speeds.font_metrics.glyph_width;
Q_snprintf(tmp, Q_min(sizeof(tmp), width / glyph_width), "%s %.3fms", label, delta_ms);
gEngine.Con_DrawString(x, y, tmp, text_color);
}
@ -97,8 +99,8 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
int max_depth = 0;
int under_waiting = 0;
g_slows.frame.ref_cpu_time = 0;
g_slows.frame.ref_cpu_wait_time = 0;
g_speeds.frame.ref_cpu_time = 0;
g_speeds.frame.ref_cpu_wait_time = 0;
for (; begin != end; begin = (begin + 1) % APROF_EVENT_BUFFER_SIZE) {
const aprof_event_t event = events[begin];
@ -107,8 +109,8 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
const int scope_id = APROF_EVENT_SCOPE_ID(event);
switch (event_type) {
case APROF_EVENT_FRAME_BOUNDARY:
g_slows.frame.ref_cpu_time = 0;
g_slows.frame.ref_cpu_wait_time = 0;
g_speeds.frame.ref_cpu_time = 0;
g_speeds.frame.ref_cpu_wait_time = 0;
under_waiting = 0;
break;
@ -144,17 +146,17 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
// Only count top level scopes towards CPU time, and only if it's not waiting
if (is_top_level && under_waiting == 0)
g_slows.frame.ref_cpu_time += delta_ns;
g_speeds.frame.ref_cpu_time += delta_ns;
// If this is a top level waiting scope (under any depth)
if (under_waiting == 1) {
// Count it towards waiting time
g_slows.frame.ref_cpu_wait_time += delta_ns;
g_speeds.frame.ref_cpu_wait_time += delta_ns;
// If this is not a top level scope, then we might count its top level parent
// towards cpu usage time, which is not correct. Subtract this waiting time from it.
if (!is_top_level)
g_slows.frame.ref_cpu_time -= delta_ns;
g_speeds.frame.ref_cpu_time -= delta_ns;
}
if (scope->flags & APROF_SCOPE_FLAG_WAIT)
@ -162,7 +164,7 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
if (draw) {
const rgba_t color = {hash >> 24, (hash>>16)&0xff, hash&0xff, 127};
const int bar_height = g_slows.font_metrics.glyph_height;
const int bar_height = g_speeds.font_metrics.glyph_height;
drawTimeBar(begin_time, time_scale_ms, stack[depth].begin_ns, timestamp_ns, y + depth * bar_height, bar_height, scope->name, color);
}
break;
@ -178,21 +180,21 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
}
static void handlePause( uint32_t prev_frame_index ) {
if (!g_slows.pause_requested || g_slows.paused_events)
if (!g_speeds.pause_requested || g_speeds.paused_events)
return;
const uint32_t frame_begin = prev_frame_index;
const uint32_t frame_end = g_aprof.events_last_frame + 1;
g_slows.paused_events_count = frame_end >= frame_begin ? frame_end - frame_begin : (frame_end + APROF_EVENT_BUFFER_SIZE - frame_begin);
g_slows.paused_events = Mem_Malloc(vk_core.pool, g_slows.paused_events_count * sizeof(g_slows.paused_events[0]));
g_speeds.paused_events_count = frame_end >= frame_begin ? frame_end - frame_begin : (frame_end + APROF_EVENT_BUFFER_SIZE - frame_begin);
g_speeds.paused_events = Mem_Malloc(vk_core.pool, g_speeds.paused_events_count * sizeof(g_speeds.paused_events[0]));
if (frame_end >= frame_begin) {
memcpy(g_slows.paused_events, g_aprof.events + frame_begin, g_slows.paused_events_count * sizeof(g_slows.paused_events[0]));
memcpy(g_speeds.paused_events, g_aprof.events + frame_begin, g_speeds.paused_events_count * sizeof(g_speeds.paused_events[0]));
} else {
const int first_chunk = (APROF_EVENT_BUFFER_SIZE - frame_begin) * sizeof(g_slows.paused_events[0]);
memcpy(g_slows.paused_events, g_aprof.events + frame_begin, first_chunk);
memcpy(g_slows.paused_events + first_chunk, g_aprof.events, frame_end * sizeof(g_slows.paused_events[0]));
const int first_chunk = (APROF_EVENT_BUFFER_SIZE - frame_begin) * sizeof(g_speeds.paused_events[0]);
memcpy(g_speeds.paused_events, g_aprof.events + frame_begin, first_chunk);
memcpy(g_speeds.paused_events + first_chunk, g_aprof.events, frame_end * sizeof(g_speeds.paused_events[0]));
}
}
@ -206,7 +208,7 @@ static int drawFrameTimeGraph( const int frame_bar_y, const float frame_bar_y_sc
CL_FillRGBA(0, frame_bar_y + frame_bar_y_scale * TARGET_FRAME_TIME * 2, vk_frame.width, 1, 255, 0, 0, 50);
for (int i = 0; i < MAX_FRAMES_HISTORY; ++i) {
const float frame_time = g_slows.frame_times[(g_slows.frame_num + i) % MAX_FRAMES_HISTORY];
const float frame_time = g_speeds.frame_times[(g_speeds.frame_num + i) % MAX_FRAMES_HISTORY];
// > 60 fps => 0, 30..60 fps -> 1..0, <30fps => 1
const float time = linearstep(TARGET_FRAME_TIME, TARGET_FRAME_TIME*2.f, frame_time);
@ -237,9 +239,9 @@ static int drawFrames( int draw, uint32_t prev_frame_index, int y, const uint64_
}
}
const aprof_event_t *const events = g_slows.paused_events ? g_slows.paused_events : g_aprof.events;
const int event_begin = g_slows.paused_events ? 0 : prev_frame_index;
const int event_end = g_slows.paused_events ? g_slows.paused_events_count - 1 : g_aprof.events_last_frame;
const aprof_event_t *const events = g_speeds.paused_events ? g_speeds.paused_events : g_aprof.events;
const int event_begin = g_speeds.paused_events ? 0 : prev_frame_index;
const int event_end = g_speeds.paused_events ? g_speeds.paused_events_count - 1 : g_aprof.events_last_frame;
const uint64_t frame_begin_time = APROF_EVENT_TIMESTAMP(events[event_begin]);
const uint64_t frame_end_time = APROF_EVENT_TIMESTAMP(events[event_end]);
const uint64_t delta_ns = frame_end_time - frame_begin_time;
@ -248,14 +250,21 @@ static int drawFrames( int draw, uint32_t prev_frame_index, int y, const uint64_
// Draw GPU last frame bar
if (draw) {
y += g_slows.font_metrics.glyph_height * 6;
const int bar_height = g_slows.font_metrics.glyph_height;
y += g_speeds.font_metrics.glyph_height * 6;
const int bar_height = g_speeds.font_metrics.glyph_height;
const rgba_t color = {255, 255, 0, 127};
drawTimeBar(frame_begin_time, time_scale_ms, gpu_frame_begin_ns, gpu_frame_end_ns, y, bar_height, "GPU TIME", color);
}
return y;
}
static void printMetrics( void ) {
for (int i = 0; i < g_speeds.metrics_count; ++i) {
const r_speeds_metric_t *const metric = g_speeds.metrics + i;
speedsPrintf("%s: %d%s\n", metric->name, metric->value, metric->unit);
}
}
static void getCurrentFontMetrics(void) {
// hidpi scaling
float scale = gEngine.pfnGetCvarFloat("con_fontscale");
@ -264,14 +273,14 @@ static void getCurrentFontMetrics(void) {
// TODO these numbers are mostly fine for the "default" font. Unfortunately
// we don't have any access to real font metrics from here, ref_api_t doesn't give us anything about fonts. ;_;
g_slows.font_metrics.glyph_width = 8 * scale;
g_slows.font_metrics.glyph_height = 20 * scale;
g_speeds.font_metrics.glyph_width = 8 * scale;
g_speeds.font_metrics.glyph_height = 20 * scale;
}
void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_begin_ns, uint64_t gpu_frame_end_ns) {
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
g_slows.frame.message[0] = '\0';
g_speeds.frame.message[0] = '\0';
const uint32_t speeds_bits = r_speeds->value;
@ -283,8 +292,8 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - frame_begin_time;
const float frame_time = delta_ns * 1e-6;
g_slows.frame_times[g_slows.frame_num] = frame_time;
g_slows.frame_num = (g_slows.frame_num + 1) % MAX_FRAMES_HISTORY;
g_speeds.frame_times[g_speeds.frame_num] = frame_time;
g_speeds.frame_num = (g_speeds.frame_num + 1) % MAX_FRAMES_HISTORY;
handlePause( prev_frame_index );
@ -292,7 +301,6 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
getCurrentFontMetrics();
int y = 100;
const float frame_bar_y_scale = 2.f; // ms to pixels (somehow)
//
const int draw = speeds_bits & SPEEDS_BIT_FRAME;
if (draw)
y = drawFrameTimeGraph( y, frame_bar_y_scale ) + 20;
@ -303,26 +311,26 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
if (speeds_bits & SPEEDS_BIT_SIMPLE) {
const uint64_t gpu_time_ns = gpu_frame_end_ns - gpu_frame_begin_ns;
speedsPrintf("frame: %.03fms GPU: %.03fms\n", frame_time, gpu_time_ns * 1e-6);
speedsPrintf(" (ref) CPU: %.03fms wait: %.03fms\n", g_slows.frame.ref_cpu_time * 1e-6, g_slows.frame.ref_cpu_wait_time * 1e-6);
speedsPrintf(" (ref) CPU: %.03fms wait: %.03fms\n", g_speeds.frame.ref_cpu_time * 1e-6, g_speeds.frame.ref_cpu_wait_time * 1e-6);
}
if (speeds_bits & SPEEDS_BIT_STATS) {
const int dirty = g_lights.stats.dirty_cells;
speedsPrintf("profiler events: %u, wraps: %d\n", events, g_aprof.current_frame_wraparounds, frame_time);
speedsPrintf("Dirty light cells: %d\n\tsize = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
printMetrics();
}
APROF_SCOPE_END(function);
}
static void togglePause( void ) {
if (g_slows.paused_events) {
Mem_Free(g_slows.paused_events);
g_slows.paused_events = NULL;
g_slows.paused_events_count = 0;
g_slows.pause_requested = 0;
if (g_speeds.paused_events) {
Mem_Free(g_speeds.paused_events);
g_speeds.paused_events = NULL;
g_speeds.paused_events_count = 0;
g_speeds.pause_requested = 0;
} else {
g_slows.pause_requested = 1;
g_speeds.pause_requested = 1;
}
}
@ -344,7 +352,17 @@ qboolean R_SpeedsMessage( char *out, size_t size )
if( r_speeds->value <= 0 ) return false;
if( !out || !size ) return false;
Q_strncpy( out, g_slows.frame.message, size );
Q_strncpy( out, g_speeds.frame.message, size );
return true;
}
r_speeds_metric_t *R_SpeedsRegisterMetric( const char *name, const char *unit ) {
ASSERT(g_speeds.metrics_count < MAX_SPEEDS_METRICS);
r_speeds_metric_t *metric = g_speeds.metrics + (g_speeds.metrics_count++);
metric->name = name;
metric->unit = unit;
return metric;
}

View File

@ -8,3 +8,12 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
// Called from the engine into ref_api to get the latest speeds info
qboolean R_SpeedsMessage( char *out, size_t size );
typedef struct {
int value;
const char *name;
const char *unit;
// int low_watermark, high_watermark;
} r_speeds_metric_t;
r_speeds_metric_t *R_SpeedsRegisterMetric( const char *name, const char *unit );

View File

@ -9,6 +9,7 @@
#include "bitarray.h"
#include "profiler.h"
#include "vk_staging.h"
#include "r_speeds.h"
#include "mod_local.h"
#include "xash3d_mathlib.h"
@ -63,6 +64,12 @@ static struct {
bit_array_t visited_cells;
uint32_t frame_sequence;
struct {
r_speeds_metric_t *dirty_cells;
r_speeds_metric_t *dirty_cells_size;
r_speeds_metric_t *ranges_uploaded;
} stats;
} g_lights_;
static struct {
@ -95,6 +102,10 @@ qboolean VK_LightsInit( void ) {
return false;
}
g_lights_.stats.dirty_cells = R_SpeedsRegisterMetric("lights_dirty_cells", "");
g_lights_.stats.dirty_cells_size = R_SpeedsRegisterMetric("lights_dirty_cells_size", "KiB");
g_lights_.stats.ranges_uploaded = R_SpeedsRegisterMetric("lights_ranges_uploaded", "");
return true;
}
@ -519,7 +530,7 @@ static qboolean addSurfaceLightToCell( int cell_index, int polygon_light_index )
cluster->polygons[cluster->num_polygons++] = polygon_light_index;
if (cluster->frame_sequence != g_lights_.frame_sequence) {
++g_lights.stats.dirty_cells;
++g_lights_.stats.dirty_cells->value;
cluster->frame_sequence = g_lights_.frame_sequence;
}
return true;
@ -538,7 +549,7 @@ static qboolean addLightToCell( int cell_index, int light_index ) {
cluster->point_lights[cluster->num_point_lights++] = light_index;
if (cluster->frame_sequence != g_lights_.frame_sequence) {
++g_lights.stats.dirty_cells;
++g_lights_.stats.dirty_cells->value;
cluster->frame_sequence = g_lights_.frame_sequence;
}
return true;
@ -936,7 +947,7 @@ void RT_LightsLoadEnd( void ) {
}
}
g_lights.stats.dirty_cells = g_lights.map.grid_cells;
g_lights_.stats.dirty_cells->value = g_lights.map.grid_cells;
}
qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id ) {
@ -1123,7 +1134,8 @@ void RT_LightsFrameBegin( void ) {
g_lights_.num_point_lights = g_lights_.num_static.point_lights;
g_lights_.num_polygon_vertices = g_lights_.num_static.polygon_vertices;
g_lights.stats.dirty_cells = 0;
g_lights_.stats.dirty_cells->value = 0;
g_lights_.stats.dirty_cells_size->value = 0;
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
vk_lights_cell_t *const cell = g_lights.cells + i;
@ -1161,13 +1173,13 @@ static void uploadGridRange( int begin, int end ) {
R_VkStagingUnlock( locked.handle );
g_lights.stats.ranges_uploaded++;
g_lights_.stats.ranges_uploaded->value++;
}
static void uploadGrid( void ) {
ASSERT(g_lights.map.grid_cells <= MAX_LIGHT_CLUSTERS);
g_lights.stats.ranges_uploaded = 0;
g_lights_.stats.ranges_uploaded->value = 0;
int begin = -1;
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
@ -1353,6 +1365,8 @@ void RT_LightsFrameEnd( void ) {
#endif
}
g_lights_.stats.dirty_cells_size->value = g_lights_.stats.dirty_cells->value * sizeof(struct LightCluster) / 1024;
debug_dump_lights.enabled = false;
APROF_SCOPE_END(finalize);
}

View File

@ -59,11 +59,6 @@ typedef struct {
} map;
vk_lights_cell_t cells[MAX_LIGHT_CLUSTERS];
struct {
int dirty_cells;
int ranges_uploaded;
} stats;
} vk_lights_t;
extern vk_lights_t g_lights;