vk: profiler: add a way to add arbitrary metrics to profiler
This commit is contained in:
parent
576b4163b9
commit
e0e9305628
|
@ -1,6 +1,4 @@
|
|||
#include "r_speeds.h"
|
||||
#include "vk_light.h" // For stats
|
||||
#include "shaders/ray_interop.h" // stats: struct LightCluster
|
||||
#include "vk_overlay.h"
|
||||
#include "vk_framectl.h"
|
||||
#include "vk_cvar.h"
|
||||
|
@ -12,6 +10,7 @@
|
|||
|
||||
#define MAX_SPEEDS_MESSAGE 1024
|
||||
#define MAX_FRAMES_HISTORY 256
|
||||
#define MAX_SPEEDS_METRICS 32
|
||||
#define TARGET_FRAME_TIME (1000.f / 60.f)
|
||||
|
||||
// Valid bits for `r_speeds` argument:
|
||||
|
@ -38,12 +37,15 @@ static struct {
|
|||
int glyph_width, glyph_height;
|
||||
} font_metrics;
|
||||
|
||||
r_speeds_metric_t metrics[MAX_SPEEDS_METRICS];
|
||||
int metrics_count;
|
||||
|
||||
struct {
|
||||
uint64_t ref_cpu_time;
|
||||
uint64_t ref_cpu_wait_time;
|
||||
char message[MAX_SPEEDS_MESSAGE];
|
||||
} frame;
|
||||
} g_slows;
|
||||
} g_speeds;
|
||||
|
||||
static void speedsPrintf( const char *msg, ... ) {
|
||||
va_list argptr;
|
||||
|
@ -53,7 +55,7 @@ static void speedsPrintf( const char *msg, ... ) {
|
|||
Q_vsprintf( text, msg, argptr );
|
||||
va_end( argptr );
|
||||
|
||||
Q_strncat( g_slows.frame.message, text, sizeof( g_slows.frame.message ));
|
||||
Q_strncat( g_speeds.frame.message, text, sizeof( g_speeds.frame.message ));
|
||||
}
|
||||
|
||||
static float linearstep(float min, float max, float v) {
|
||||
|
@ -82,7 +84,7 @@ static void drawTimeBar(uint64_t begin_time_ns, float time_scale_ms, int64_t beg
|
|||
// Tweak this if scope names escape the block boundaries
|
||||
char tmp[64];
|
||||
tmp[0] = '\0';
|
||||
const int glyph_width = g_slows.font_metrics.glyph_width;
|
||||
const int glyph_width = g_speeds.font_metrics.glyph_width;
|
||||
Q_snprintf(tmp, Q_min(sizeof(tmp), width / glyph_width), "%s %.3fms", label, delta_ms);
|
||||
gEngine.Con_DrawString(x, y, tmp, text_color);
|
||||
}
|
||||
|
@ -97,8 +99,8 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
|
|||
int max_depth = 0;
|
||||
|
||||
int under_waiting = 0;
|
||||
g_slows.frame.ref_cpu_time = 0;
|
||||
g_slows.frame.ref_cpu_wait_time = 0;
|
||||
g_speeds.frame.ref_cpu_time = 0;
|
||||
g_speeds.frame.ref_cpu_wait_time = 0;
|
||||
|
||||
for (; begin != end; begin = (begin + 1) % APROF_EVENT_BUFFER_SIZE) {
|
||||
const aprof_event_t event = events[begin];
|
||||
|
@ -107,8 +109,8 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
|
|||
const int scope_id = APROF_EVENT_SCOPE_ID(event);
|
||||
switch (event_type) {
|
||||
case APROF_EVENT_FRAME_BOUNDARY:
|
||||
g_slows.frame.ref_cpu_time = 0;
|
||||
g_slows.frame.ref_cpu_wait_time = 0;
|
||||
g_speeds.frame.ref_cpu_time = 0;
|
||||
g_speeds.frame.ref_cpu_wait_time = 0;
|
||||
under_waiting = 0;
|
||||
break;
|
||||
|
||||
|
@ -144,17 +146,17 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
|
|||
|
||||
// Only count top level scopes towards CPU time, and only if it's not waiting
|
||||
if (is_top_level && under_waiting == 0)
|
||||
g_slows.frame.ref_cpu_time += delta_ns;
|
||||
g_speeds.frame.ref_cpu_time += delta_ns;
|
||||
|
||||
// If this is a top level waiting scope (under any depth)
|
||||
if (under_waiting == 1) {
|
||||
// Count it towards waiting time
|
||||
g_slows.frame.ref_cpu_wait_time += delta_ns;
|
||||
g_speeds.frame.ref_cpu_wait_time += delta_ns;
|
||||
|
||||
// If this is not a top level scope, then we might count its top level parent
|
||||
// towards cpu usage time, which is not correct. Subtract this waiting time from it.
|
||||
if (!is_top_level)
|
||||
g_slows.frame.ref_cpu_time -= delta_ns;
|
||||
g_speeds.frame.ref_cpu_time -= delta_ns;
|
||||
}
|
||||
|
||||
if (scope->flags & APROF_SCOPE_FLAG_WAIT)
|
||||
|
@ -162,7 +164,7 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
|
|||
|
||||
if (draw) {
|
||||
const rgba_t color = {hash >> 24, (hash>>16)&0xff, hash&0xff, 127};
|
||||
const int bar_height = g_slows.font_metrics.glyph_height;
|
||||
const int bar_height = g_speeds.font_metrics.glyph_height;
|
||||
drawTimeBar(begin_time, time_scale_ms, stack[depth].begin_ns, timestamp_ns, y + depth * bar_height, bar_height, scope->name, color);
|
||||
}
|
||||
break;
|
||||
|
@ -178,21 +180,21 @@ static void drawProfilerScopes(int draw, const aprof_event_t *events, uint64_t b
|
|||
}
|
||||
|
||||
static void handlePause( uint32_t prev_frame_index ) {
|
||||
if (!g_slows.pause_requested || g_slows.paused_events)
|
||||
if (!g_speeds.pause_requested || g_speeds.paused_events)
|
||||
return;
|
||||
|
||||
const uint32_t frame_begin = prev_frame_index;
|
||||
const uint32_t frame_end = g_aprof.events_last_frame + 1;
|
||||
|
||||
g_slows.paused_events_count = frame_end >= frame_begin ? frame_end - frame_begin : (frame_end + APROF_EVENT_BUFFER_SIZE - frame_begin);
|
||||
g_slows.paused_events = Mem_Malloc(vk_core.pool, g_slows.paused_events_count * sizeof(g_slows.paused_events[0]));
|
||||
g_speeds.paused_events_count = frame_end >= frame_begin ? frame_end - frame_begin : (frame_end + APROF_EVENT_BUFFER_SIZE - frame_begin);
|
||||
g_speeds.paused_events = Mem_Malloc(vk_core.pool, g_speeds.paused_events_count * sizeof(g_speeds.paused_events[0]));
|
||||
|
||||
if (frame_end >= frame_begin) {
|
||||
memcpy(g_slows.paused_events, g_aprof.events + frame_begin, g_slows.paused_events_count * sizeof(g_slows.paused_events[0]));
|
||||
memcpy(g_speeds.paused_events, g_aprof.events + frame_begin, g_speeds.paused_events_count * sizeof(g_speeds.paused_events[0]));
|
||||
} else {
|
||||
const int first_chunk = (APROF_EVENT_BUFFER_SIZE - frame_begin) * sizeof(g_slows.paused_events[0]);
|
||||
memcpy(g_slows.paused_events, g_aprof.events + frame_begin, first_chunk);
|
||||
memcpy(g_slows.paused_events + first_chunk, g_aprof.events, frame_end * sizeof(g_slows.paused_events[0]));
|
||||
const int first_chunk = (APROF_EVENT_BUFFER_SIZE - frame_begin) * sizeof(g_speeds.paused_events[0]);
|
||||
memcpy(g_speeds.paused_events, g_aprof.events + frame_begin, first_chunk);
|
||||
memcpy(g_speeds.paused_events + first_chunk, g_aprof.events, frame_end * sizeof(g_speeds.paused_events[0]));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -206,7 +208,7 @@ static int drawFrameTimeGraph( const int frame_bar_y, const float frame_bar_y_sc
|
|||
CL_FillRGBA(0, frame_bar_y + frame_bar_y_scale * TARGET_FRAME_TIME * 2, vk_frame.width, 1, 255, 0, 0, 50);
|
||||
|
||||
for (int i = 0; i < MAX_FRAMES_HISTORY; ++i) {
|
||||
const float frame_time = g_slows.frame_times[(g_slows.frame_num + i) % MAX_FRAMES_HISTORY];
|
||||
const float frame_time = g_speeds.frame_times[(g_speeds.frame_num + i) % MAX_FRAMES_HISTORY];
|
||||
|
||||
// > 60 fps => 0, 30..60 fps -> 1..0, <30fps => 1
|
||||
const float time = linearstep(TARGET_FRAME_TIME, TARGET_FRAME_TIME*2.f, frame_time);
|
||||
|
@ -237,9 +239,9 @@ static int drawFrames( int draw, uint32_t prev_frame_index, int y, const uint64_
|
|||
}
|
||||
}
|
||||
|
||||
const aprof_event_t *const events = g_slows.paused_events ? g_slows.paused_events : g_aprof.events;
|
||||
const int event_begin = g_slows.paused_events ? 0 : prev_frame_index;
|
||||
const int event_end = g_slows.paused_events ? g_slows.paused_events_count - 1 : g_aprof.events_last_frame;
|
||||
const aprof_event_t *const events = g_speeds.paused_events ? g_speeds.paused_events : g_aprof.events;
|
||||
const int event_begin = g_speeds.paused_events ? 0 : prev_frame_index;
|
||||
const int event_end = g_speeds.paused_events ? g_speeds.paused_events_count - 1 : g_aprof.events_last_frame;
|
||||
const uint64_t frame_begin_time = APROF_EVENT_TIMESTAMP(events[event_begin]);
|
||||
const uint64_t frame_end_time = APROF_EVENT_TIMESTAMP(events[event_end]);
|
||||
const uint64_t delta_ns = frame_end_time - frame_begin_time;
|
||||
|
@ -248,14 +250,21 @@ static int drawFrames( int draw, uint32_t prev_frame_index, int y, const uint64_
|
|||
|
||||
// Draw GPU last frame bar
|
||||
if (draw) {
|
||||
y += g_slows.font_metrics.glyph_height * 6;
|
||||
const int bar_height = g_slows.font_metrics.glyph_height;
|
||||
y += g_speeds.font_metrics.glyph_height * 6;
|
||||
const int bar_height = g_speeds.font_metrics.glyph_height;
|
||||
const rgba_t color = {255, 255, 0, 127};
|
||||
drawTimeBar(frame_begin_time, time_scale_ms, gpu_frame_begin_ns, gpu_frame_end_ns, y, bar_height, "GPU TIME", color);
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
static void printMetrics( void ) {
|
||||
for (int i = 0; i < g_speeds.metrics_count; ++i) {
|
||||
const r_speeds_metric_t *const metric = g_speeds.metrics + i;
|
||||
speedsPrintf("%s: %d%s\n", metric->name, metric->value, metric->unit);
|
||||
}
|
||||
}
|
||||
|
||||
static void getCurrentFontMetrics(void) {
|
||||
// hidpi scaling
|
||||
float scale = gEngine.pfnGetCvarFloat("con_fontscale");
|
||||
|
@ -264,14 +273,14 @@ static void getCurrentFontMetrics(void) {
|
|||
|
||||
// TODO these numbers are mostly fine for the "default" font. Unfortunately
|
||||
// we don't have any access to real font metrics from here, ref_api_t doesn't give us anything about fonts. ;_;
|
||||
g_slows.font_metrics.glyph_width = 8 * scale;
|
||||
g_slows.font_metrics.glyph_height = 20 * scale;
|
||||
g_speeds.font_metrics.glyph_width = 8 * scale;
|
||||
g_speeds.font_metrics.glyph_height = 20 * scale;
|
||||
}
|
||||
|
||||
void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_begin_ns, uint64_t gpu_frame_end_ns) {
|
||||
APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__);
|
||||
|
||||
g_slows.frame.message[0] = '\0';
|
||||
g_speeds.frame.message[0] = '\0';
|
||||
|
||||
const uint32_t speeds_bits = r_speeds->value;
|
||||
|
||||
|
@ -283,8 +292,8 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
|
|||
const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - frame_begin_time;
|
||||
const float frame_time = delta_ns * 1e-6;
|
||||
|
||||
g_slows.frame_times[g_slows.frame_num] = frame_time;
|
||||
g_slows.frame_num = (g_slows.frame_num + 1) % MAX_FRAMES_HISTORY;
|
||||
g_speeds.frame_times[g_speeds.frame_num] = frame_time;
|
||||
g_speeds.frame_num = (g_speeds.frame_num + 1) % MAX_FRAMES_HISTORY;
|
||||
|
||||
handlePause( prev_frame_index );
|
||||
|
||||
|
@ -292,7 +301,6 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
|
|||
getCurrentFontMetrics();
|
||||
int y = 100;
|
||||
const float frame_bar_y_scale = 2.f; // ms to pixels (somehow)
|
||||
//
|
||||
const int draw = speeds_bits & SPEEDS_BIT_FRAME;
|
||||
if (draw)
|
||||
y = drawFrameTimeGraph( y, frame_bar_y_scale ) + 20;
|
||||
|
@ -303,26 +311,26 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
|
|||
if (speeds_bits & SPEEDS_BIT_SIMPLE) {
|
||||
const uint64_t gpu_time_ns = gpu_frame_end_ns - gpu_frame_begin_ns;
|
||||
speedsPrintf("frame: %.03fms GPU: %.03fms\n", frame_time, gpu_time_ns * 1e-6);
|
||||
speedsPrintf(" (ref) CPU: %.03fms wait: %.03fms\n", g_slows.frame.ref_cpu_time * 1e-6, g_slows.frame.ref_cpu_wait_time * 1e-6);
|
||||
speedsPrintf(" (ref) CPU: %.03fms wait: %.03fms\n", g_speeds.frame.ref_cpu_time * 1e-6, g_speeds.frame.ref_cpu_wait_time * 1e-6);
|
||||
}
|
||||
|
||||
if (speeds_bits & SPEEDS_BIT_STATS) {
|
||||
const int dirty = g_lights.stats.dirty_cells;
|
||||
speedsPrintf("profiler events: %u, wraps: %d\n", events, g_aprof.current_frame_wraparounds, frame_time);
|
||||
speedsPrintf("Dirty light cells: %d\n\tsize = %dKiB, ranges = %d\n", dirty, (int)(dirty * sizeof(struct LightCluster) / 1024), g_lights.stats.ranges_uploaded);
|
||||
|
||||
printMetrics();
|
||||
}
|
||||
|
||||
APROF_SCOPE_END(function);
|
||||
}
|
||||
|
||||
static void togglePause( void ) {
|
||||
if (g_slows.paused_events) {
|
||||
Mem_Free(g_slows.paused_events);
|
||||
g_slows.paused_events = NULL;
|
||||
g_slows.paused_events_count = 0;
|
||||
g_slows.pause_requested = 0;
|
||||
if (g_speeds.paused_events) {
|
||||
Mem_Free(g_speeds.paused_events);
|
||||
g_speeds.paused_events = NULL;
|
||||
g_speeds.paused_events_count = 0;
|
||||
g_speeds.pause_requested = 0;
|
||||
} else {
|
||||
g_slows.pause_requested = 1;
|
||||
g_speeds.pause_requested = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -344,7 +352,17 @@ qboolean R_SpeedsMessage( char *out, size_t size )
|
|||
if( r_speeds->value <= 0 ) return false;
|
||||
if( !out || !size ) return false;
|
||||
|
||||
Q_strncpy( out, g_slows.frame.message, size );
|
||||
Q_strncpy( out, g_speeds.frame.message, size );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
r_speeds_metric_t *R_SpeedsRegisterMetric( const char *name, const char *unit ) {
|
||||
ASSERT(g_speeds.metrics_count < MAX_SPEEDS_METRICS);
|
||||
|
||||
r_speeds_metric_t *metric = g_speeds.metrics + (g_speeds.metrics_count++);
|
||||
metric->name = name;
|
||||
metric->unit = unit;
|
||||
|
||||
return metric;
|
||||
}
|
||||
|
|
|
@ -8,3 +8,12 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index, uint64_t gpu_frame_b
|
|||
|
||||
// Called from the engine into ref_api to get the latest speeds info
|
||||
qboolean R_SpeedsMessage( char *out, size_t size );
|
||||
|
||||
typedef struct {
|
||||
int value;
|
||||
const char *name;
|
||||
const char *unit;
|
||||
// int low_watermark, high_watermark;
|
||||
} r_speeds_metric_t;
|
||||
|
||||
r_speeds_metric_t *R_SpeedsRegisterMetric( const char *name, const char *unit );
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "bitarray.h"
|
||||
#include "profiler.h"
|
||||
#include "vk_staging.h"
|
||||
#include "r_speeds.h"
|
||||
|
||||
#include "mod_local.h"
|
||||
#include "xash3d_mathlib.h"
|
||||
|
@ -63,6 +64,12 @@ static struct {
|
|||
bit_array_t visited_cells;
|
||||
|
||||
uint32_t frame_sequence;
|
||||
|
||||
struct {
|
||||
r_speeds_metric_t *dirty_cells;
|
||||
r_speeds_metric_t *dirty_cells_size;
|
||||
r_speeds_metric_t *ranges_uploaded;
|
||||
} stats;
|
||||
} g_lights_;
|
||||
|
||||
static struct {
|
||||
|
@ -95,6 +102,10 @@ qboolean VK_LightsInit( void ) {
|
|||
return false;
|
||||
}
|
||||
|
||||
g_lights_.stats.dirty_cells = R_SpeedsRegisterMetric("lights_dirty_cells", "");
|
||||
g_lights_.stats.dirty_cells_size = R_SpeedsRegisterMetric("lights_dirty_cells_size", "KiB");
|
||||
g_lights_.stats.ranges_uploaded = R_SpeedsRegisterMetric("lights_ranges_uploaded", "");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -519,7 +530,7 @@ static qboolean addSurfaceLightToCell( int cell_index, int polygon_light_index )
|
|||
|
||||
cluster->polygons[cluster->num_polygons++] = polygon_light_index;
|
||||
if (cluster->frame_sequence != g_lights_.frame_sequence) {
|
||||
++g_lights.stats.dirty_cells;
|
||||
++g_lights_.stats.dirty_cells->value;
|
||||
cluster->frame_sequence = g_lights_.frame_sequence;
|
||||
}
|
||||
return true;
|
||||
|
@ -538,7 +549,7 @@ static qboolean addLightToCell( int cell_index, int light_index ) {
|
|||
cluster->point_lights[cluster->num_point_lights++] = light_index;
|
||||
|
||||
if (cluster->frame_sequence != g_lights_.frame_sequence) {
|
||||
++g_lights.stats.dirty_cells;
|
||||
++g_lights_.stats.dirty_cells->value;
|
||||
cluster->frame_sequence = g_lights_.frame_sequence;
|
||||
}
|
||||
return true;
|
||||
|
@ -936,7 +947,7 @@ void RT_LightsLoadEnd( void ) {
|
|||
}
|
||||
}
|
||||
|
||||
g_lights.stats.dirty_cells = g_lights.map.grid_cells;
|
||||
g_lights_.stats.dirty_cells->value = g_lights.map.grid_cells;
|
||||
}
|
||||
|
||||
qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id ) {
|
||||
|
@ -1123,7 +1134,8 @@ void RT_LightsFrameBegin( void ) {
|
|||
g_lights_.num_point_lights = g_lights_.num_static.point_lights;
|
||||
g_lights_.num_polygon_vertices = g_lights_.num_static.polygon_vertices;
|
||||
|
||||
g_lights.stats.dirty_cells = 0;
|
||||
g_lights_.stats.dirty_cells->value = 0;
|
||||
g_lights_.stats.dirty_cells_size->value = 0;
|
||||
|
||||
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
|
||||
vk_lights_cell_t *const cell = g_lights.cells + i;
|
||||
|
@ -1161,13 +1173,13 @@ static void uploadGridRange( int begin, int end ) {
|
|||
|
||||
R_VkStagingUnlock( locked.handle );
|
||||
|
||||
g_lights.stats.ranges_uploaded++;
|
||||
g_lights_.stats.ranges_uploaded->value++;
|
||||
}
|
||||
|
||||
static void uploadGrid( void ) {
|
||||
ASSERT(g_lights.map.grid_cells <= MAX_LIGHT_CLUSTERS);
|
||||
|
||||
g_lights.stats.ranges_uploaded = 0;
|
||||
g_lights_.stats.ranges_uploaded->value = 0;
|
||||
|
||||
int begin = -1;
|
||||
for (int i = 0; i < g_lights.map.grid_cells; ++i) {
|
||||
|
@ -1353,6 +1365,8 @@ void RT_LightsFrameEnd( void ) {
|
|||
#endif
|
||||
}
|
||||
|
||||
g_lights_.stats.dirty_cells_size->value = g_lights_.stats.dirty_cells->value * sizeof(struct LightCluster) / 1024;
|
||||
|
||||
debug_dump_lights.enabled = false;
|
||||
APROF_SCOPE_END(finalize);
|
||||
}
|
||||
|
|
|
@ -59,11 +59,6 @@ typedef struct {
|
|||
} map;
|
||||
|
||||
vk_lights_cell_t cells[MAX_LIGHT_CLUSTERS];
|
||||
|
||||
struct {
|
||||
int dirty_cells;
|
||||
int ranges_uploaded;
|
||||
} stats;
|
||||
} vk_lights_t;
|
||||
|
||||
extern vk_lights_t g_lights;
|
||||
|
|
Loading…
Reference in New Issue