xash3d-fwgs/ref/vk/profiler.h
2023-02-16 10:30:31 -08:00

156 lines
3.9 KiB
C

#pragma once
#include <stdint.h>
#include <assert.h>
#include <string.h>
#define APROF_SCOPE_DECLARE(scope) \
static aprof_scope_id_t _aprof_scope_id_##scope = -1
// scope_name is expected to be static and alive for the entire duration of the program
#define APROF_SCOPE_INIT(scope, scope_name) \
_aprof_scope_id_##scope = aprof_scope_init(scope_name)
#define APROF_SCOPE_BEGIN(scope) \
aprof_scope_event(_aprof_scope_id_##scope, 1)
#define APROF_TOKENPASTE(x, y) x ## y
#define APROF_TOKENPASTE2(x, y) APROF_TOKENPASTE(x, y)
#define APROF_SCOPE_BEGIN_EARLY(scope) \
const int APROF_TOKENPASTE2(_aprof_dummy, __LINE__) = (aprof_scope_event(_aprof_scope_id_##scope, 1), 0)
#define APROF_SCOPE_END(scope) \
aprof_scope_event(_aprof_scope_id_##scope, 0)
typedef int aprof_scope_id_t;
aprof_scope_id_t aprof_scope_init(const char *scope_name);
void aprof_scope_event(aprof_scope_id_t, int begin);
void aprof_scope_frame( void );
uint64_t aprof_time_now_ns( void );
typedef struct {
const char *name;
struct {
uint64_t duration;
uint64_t duration_children;
int count;
} frame;
} aprof_scope_t;
#define APROF_MAX_SCOPES 256
#define APROF_MAX_STACK_DEPTH 32
typedef struct {
aprof_scope_id_t scope;
uint64_t time_begin;
} aprof_stack_frame_t;
typedef struct {
aprof_scope_t scopes[APROF_MAX_SCOPES];
int num_scopes;
aprof_stack_frame_t stack[APROF_MAX_STACK_DEPTH];
int stack_depth;
// TODO event log for chrome://trace (or similar) export and analysis
} aprof_state_t;
extern aprof_state_t g_aprof;
#if defined(APROF_IMPLEMENT)
#ifdef __linux__
#include <time.h>
uint64_t aprof_time_now_ns( void ) {
struct timespec tp;
clock_gettime(CLOCK_MONOTONIC, &tp);
return tp.tv_nsec + tp.tv_sec * 1000000000ull;
}
#elif defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#define WIN32_EXTRA_LEAN
#include <windows.h>
static LARGE_INTEGER _aprof_frequency;
uint64_t aprof_time_now_ns( void ) {
LARGE_INTEGER pc;
QueryPerformanceCounter(&pc);
return pc.QuadPart * 1000000000ull / _aprof_frequency.QuadPart;
}
#else
#error aprof is not implemented for this os
#endif
aprof_state_t g_aprof = {0};
aprof_scope_id_t aprof_scope_init(const char *scope_name) {
#if defined(_WIN32)
if (_aprof_frequency.QuadPart == 0)
QueryPerformanceFrequency(&_aprof_frequency);
#endif
if (g_aprof.num_scopes == APROF_MAX_SCOPES)
return -1;
g_aprof.scopes[g_aprof.num_scopes].name = scope_name;
return g_aprof.num_scopes++;
}
void aprof_scope_event(aprof_scope_id_t scope_id, int begin) {
const uint64_t now = aprof_time_now_ns();
if (scope_id < 0 || scope_id >= g_aprof.num_scopes)
return;
// TODO improve performance by just writing into an event array here
// analysis should be done on-demand later
if (begin) {
const int s = g_aprof.stack_depth;
if (g_aprof.stack_depth == APROF_MAX_STACK_DEPTH)
return;
g_aprof.stack[s].scope = scope_id;
g_aprof.stack[s].time_begin = now;
g_aprof.stack_depth++;
} else {
aprof_scope_t *scope;
const aprof_stack_frame_t *const frame = g_aprof.stack + g_aprof.stack_depth - 1;
uint64_t frame_duration;
assert(g_aprof.stack_depth > 0);
if (g_aprof.stack_depth == 0)
return;
assert(frame->scope == scope_id);
scope = g_aprof.scopes + frame->scope;
frame_duration = now - frame->time_begin;
scope->frame.duration += frame_duration;
scope->frame.count++;
if (g_aprof.stack_depth > 1) {
const aprof_stack_frame_t *const parent_frame = g_aprof.stack + g_aprof.stack_depth - 2;
aprof_scope_t *const parent_scope = g_aprof.scopes + parent_frame->scope;
assert(parent_frame->scope >= 0);
assert(parent_frame->scope < g_aprof.num_scopes);
parent_scope->frame.duration_children += frame_duration;
}
g_aprof.stack_depth--;
}
}
void aprof_scope_frame( void ) {
assert(g_aprof.stack_depth == 0);
for (int i = 0; i < g_aprof.num_scopes; ++i) {
aprof_scope_t *const scope = g_aprof.scopes + i;
memset(&scope->frame, 0, sizeof(scope->frame));
}
}
#endif