From 1c9ff300a9457eb089493297ffc73c69a89bc181 Mon Sep 17 00:00:00 2001 From: Ivan Avdeev Date: Tue, 14 Mar 2023 11:17:49 -0700 Subject: [PATCH] vk: draw profiler scope blocks also add a couple more scopes to rendering --- ref/vk/r_slows.c | 93 ++++++++++++++++++++++++++++++++++++++++++-- ref/vk/vk_framectl.c | 6 +-- ref/vk/vk_render.c | 14 +++++++ 3 files changed, 107 insertions(+), 6 deletions(-) diff --git a/ref/vk/r_slows.c b/ref/vk/r_slows.c index 7f4b0078..6f345a12 100644 --- a/ref/vk/r_slows.c +++ b/ref/vk/r_slows.c @@ -6,6 +6,9 @@ #include "profiler.h" +#include "crclib.h" // CRC32 for stable random colors +#include "xash3d_mathlib.h" // Q_min + #define MAX_FRAMES_HISTORY 256 #define TARGET_FRAME_TIME (1000.f / 60.f) @@ -14,13 +17,81 @@ static struct { uint32_t frame_num; } g_slows; - static float linearstep(float min, float max, float v) { if (v <= min) return 0; if (v >= max) return 1; return (v - min) / (max - min); } +#define P(fmt, ...) gEngine.Con_Reportf(fmt, ##__VA_ARGS__) + +static uint32_t getHash(const char *s) { + dword crc; + CRC32_Init(&crc); + CRC32_ProcessBuffer(&crc, s, Q_strlen(s)); + return CRC32_Final(crc); +} + +static void drawProfilerScopes(uint64_t frame_begin_time, float time_scale_ms, uint32_t begin, uint32_t end, int y) { +#define MAX_STACK_DEPTH 16 + const int height = 20; + + struct { + int scope_id; + uint64_t begin_ns; + } stack[MAX_STACK_DEPTH]; + int depth = 0; + int max_depth = 0; + + for (; begin != end; begin = (begin + 1) % APROF_EVENT_BUFFER_SIZE) { + const aprof_event_t event = g_aprof.events[begin]; + const int event_type = APROF_EVENT_TYPE(event); + const uint64_t timestamp_ns = APROF_EVENT_TIMESTAMP(event); + const int scope_id = APROF_EVENT_SCOPE_ID(event); + switch (event_type) { + case APROF_EVENT_SCOPE_BEGIN: { + if (depth < MAX_STACK_DEPTH) { + stack[depth].begin_ns = timestamp_ns; + stack[depth].scope_id = scope_id; + } + ++depth; + if (max_depth < depth) + max_depth = depth; + break; + } + + case APROF_EVENT_SCOPE_END: { + ASSERT(depth > 0); + --depth; + + ASSERT(stack[depth].scope_id == scope_id); + + const int width = (timestamp_ns - stack[depth].begin_ns) * 1e-6 * time_scale_ms; + const int x = (stack[depth].begin_ns - frame_begin_time) * 1e-6 * time_scale_ms; + + const int yy = y + depth * height; + const char *scope_name = g_aprof.scopes[scope_id].name; + const uint32_t hash = getHash(scope_name); + + rgba_t color = {hash >> 24, (hash>>16)&0xff, hash&0xff, 127}; + rgba_t text_color = {255-color[0], 255-color[1], 255-color[2], 255}; + CL_FillRGBA(x, yy, width, height, color[0], color[1], color[2], color[3]); + char tmp[64]; + tmp[0] = '\0'; + Q_strncpy(tmp, scope_name, Q_min(sizeof(tmp), width / 10)); + gEngine.Con_DrawString(x, yy, tmp, text_color); + break; + } + + default: + break; + } + } + + if (max_depth > MAX_STACK_DEPTH) + gEngine.Con_NPrintf(4, S_ERROR "Profiler stack overflow: reached %d, max available %d\n", max_depth, MAX_STACK_DEPTH); +} + // FIXME move this to r_speeds or something like that void R_ShowExtendedProfilingData(uint32_t prev_frame_index) { { @@ -29,7 +100,8 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index) { } const uint32_t events = g_aprof.events_last_frame - prev_frame_index; - const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - APROF_EVENT_TIMESTAMP(g_aprof.events[prev_frame_index]); + const uint64_t frame_begin_time = APROF_EVENT_TIMESTAMP(g_aprof.events[prev_frame_index]); + const unsigned long long delta_ns = APROF_EVENT_TIMESTAMP(g_aprof.events[g_aprof.events_last_frame]) - frame_begin_time; const float frame_time = delta_ns / 1e6; gEngine.Con_NPrintf(5, "aprof events this frame: %u, wraps: %d, frame time: %.03fms\n", events, g_aprof.current_frame_wraparounds, frame_time); @@ -38,6 +110,15 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index) { g_slows.frame_num = (g_slows.frame_num + 1) % MAX_FRAMES_HISTORY; const float width = (float)vk_frame.width / MAX_FRAMES_HISTORY; + const int frame_bar_y = 100; + const float frame_bar_y_scale = 2.f; // ms to pixels + + // 60fps + CL_FillRGBA(0, frame_bar_y + frame_bar_y_scale * TARGET_FRAME_TIME, vk_frame.width, 1, 0, 255, 0, 50); + + // 30fps + CL_FillRGBA(0, frame_bar_y + frame_bar_y_scale * TARGET_FRAME_TIME * 2, vk_frame.width, 1, 255, 0, 0, 50); + for (int i = 0; i < MAX_FRAMES_HISTORY; ++i) { const float frame_time = g_slows.frame_times[(g_slows.frame_num + i) % MAX_FRAMES_HISTORY]; @@ -45,7 +126,13 @@ void R_ShowExtendedProfilingData(uint32_t prev_frame_index) { const float time = linearstep(TARGET_FRAME_TIME, TARGET_FRAME_TIME*2.f, frame_time); const int red = 255 * time; const int green = 255 * (1 - time); - CL_FillRGBA(i * width, 100, width, frame_time * 2.f, red, green, 0, 127); + CL_FillRGBA(i * width, frame_bar_y, width, frame_time * frame_bar_y_scale, red, green, 0, 127); + } + + { + const float time_scale_ms = (double)vk_frame.width / (delta_ns / 1e6); + const int y = frame_bar_y + frame_bar_y_scale * TARGET_FRAME_TIME * 2; + drawProfilerScopes(frame_begin_time, time_scale_ms, prev_frame_index, g_aprof.events_last_frame, y); } /* gEngine.Con_NPrintf(5, "Perf scopes:"); */ diff --git a/ref/vk/vk_framectl.c b/ref/vk/vk_framectl.c index af428ca9..c664cfdd 100644 --- a/ref/vk/vk_framectl.c +++ b/ref/vk/vk_framectl.c @@ -11,9 +11,6 @@ #include "vk_staging.h" #include "vk_commandpool.h" -#include "vk_light.h" // For stats -#include "shaders/ray_interop.h" // stats: struct LightCluster - #include "profiler.h" #include "r_slows.h" @@ -54,6 +51,7 @@ static struct { X(render_frame, "VK_RenderFrame"); \ X(end_frame, "R_EndFrame"); \ X(frame_gpu_wait, "Wait for GPU"); \ + X(wait_for_frame_fence, "waitForFrameFence"); \ #define SCOPE_DECLARE(scope, name) APROF_SCOPE_DECLARE(scope) PROFILER_SCOPES(SCOPE_DECLARE) @@ -145,6 +143,7 @@ static VkRenderPass createRenderPass( VkFormat depth_format, qboolean ray_tracin } static void waitForFrameFence( void ) { + APROF_SCOPE_BEGIN(wait_for_frame_fence); for(qboolean loop = true; loop; ) { #define MAX_WAIT (10ull * 1000*1000*1000) const VkResult fence_result = vkWaitForFences(vk_core.device, 1, g_frame.fence_done + g_frame.current.index, VK_TRUE, MAX_WAIT); @@ -162,6 +161,7 @@ static void waitForFrameFence( void ) { } XVK_CHECK(vkResetFences(vk_core.device, 1, g_frame.fence_done + g_frame.current.index)); + APROF_SCOPE_END(wait_for_frame_fence); } static void updateGamma( void ) { diff --git a/ref/vk/vk_render.c b/ref/vk/vk_render.c index ae6d7b1d..56c9e49c 100644 --- a/ref/vk/vk_render.c +++ b/ref/vk/vk_render.c @@ -14,6 +14,7 @@ #include "vk_framectl.h" // FIXME needed for dynamic models cmdbuf #include "vk_previous_frame.h" #include "alolcator.h" +#include "profiler.h" #include "eiface.h" #include "xash3d_mathlib.h" @@ -23,6 +24,13 @@ #define MAX_UNIFORM_SLOTS (MAX_SCENE_ENTITIES * 2 /* solid + trans */ + 1) +#define PROFILER_SCOPES(X) \ + X(renderbegin, "VK_RenderBegin"); \ + +#define SCOPE_DECLARE(scope, name) APROF_SCOPE_DECLARE(scope) +PROFILER_SCOPES(SCOPE_DECLARE) +#undef SCOPE_DECLARE + typedef struct { matrix4x4 mvp; vec4_t color; @@ -278,6 +286,8 @@ static struct { } g_render_state; qboolean VK_RenderInit( void ) { + PROFILER_SCOPES(APROF_SCOPE_INIT); + g_render.ubo_align = Q_max(4, vk_core.physical_device.properties.limits.minUniformBufferOffsetAlignment); const uint32_t uniform_unit_size = ((sizeof(uniform_data_t) + g_render.ubo_align - 1) / g_render.ubo_align) * g_render.ubo_align; @@ -345,6 +355,8 @@ enum { }; void VK_RenderBegin( qboolean ray_tracing ) { + APROF_SCOPE_BEGIN(renderbegin); + g_render_state.uniform_data_set_mask = UNIFORM_UNSET; g_render_state.current_ubo_offset_FIXME = UINT32_MAX; memset(&g_render_state.current_uniform_data, 0, sizeof(g_render_state.current_uniform_data)); @@ -358,6 +370,8 @@ void VK_RenderBegin( qboolean ray_tracing ) { if (ray_tracing) VK_RayFrameBegin(); + + APROF_SCOPE_END(renderbegin); } // Vulkan has Y pointing down, and z should end up in (0, 1)