mirror of
https://github.com/w23/xash3d-fwgs
synced 2024-12-05 00:11:05 +01:00
vk: rt: extract raw shader clock values
also add observations to the shader, what values do we see
This commit is contained in:
parent
1fcd64b65c
commit
cbdbda3549
@ -1011,3 +1011,16 @@ This would need the same as above, plus:
|
||||
- A: probably should still do it on GPU lol
|
||||
|
||||
This would also allow passing arbitrary per-pixel data from shaders, which would make shader debugging much much easier.
|
||||
|
||||
# 2023-12-07 E343
|
||||
## What do we really need for shader profiling
|
||||
### Optimizing polygon light sampling
|
||||
- Per-pixel numbers:
|
||||
- Total shader time
|
||||
- Sampling selection time (Σ)
|
||||
- Selecting lights to sample (+count)
|
||||
- Selecting light point to sample
|
||||
- Vertices count
|
||||
- Ray tracing time (Σ, +count)
|
||||
- Aggregate numbers:
|
||||
- TODO: what does VK_KHR_performance_query give us? Regs usage, etc.
|
||||
|
@ -1,3 +1,9 @@
|
||||
# 2023-12-07 E343
|
||||
- [x] extract raw shader clock
|
||||
- [ ] display times as scopes somewhere
|
||||
- [ ] extract sampling times
|
||||
- [ ] extract ray times
|
||||
|
||||
# 2023-12-05 E342
|
||||
- [x] tone down the specular indirect blur
|
||||
- [-] try func_wall static light opt, #687
|
||||
|
@ -21,13 +21,15 @@
|
||||
#define vec3 vec3_t
|
||||
#define vec4 vec4_t
|
||||
#define mat4 matrix4x4
|
||||
typedef int ivec3[3];
|
||||
typedef int ivec2[2];
|
||||
typedef int32_t ivec3[3];
|
||||
typedef int32_t ivec2[2];
|
||||
typedef uint32_t uvec2[2];
|
||||
typedef uint32_t uvec3[3];
|
||||
typedef uint32_t uvec4[4];
|
||||
#define TOKENPASTE(x, y) x ## y
|
||||
#define TOKENPASTE2(x, y) TOKENPASTE(x, y)
|
||||
#define PAD(x) float TOKENPASTE2(pad_, __LINE__)[x];
|
||||
#define STRUCT struct
|
||||
|
||||
enum {
|
||||
#define DECLARE_SPECIALIZATION_CONSTANT(index, type, name, default_value) \
|
||||
SPEC_##name##_INDEX = index,
|
||||
@ -207,6 +209,10 @@ struct UniformBuffer {
|
||||
uint debug_display_only;
|
||||
};
|
||||
|
||||
struct ProfilingStruct {
|
||||
uvec4 data[4];
|
||||
};
|
||||
|
||||
#undef PAD
|
||||
#undef STRUCT
|
||||
|
||||
|
@ -1,6 +1,12 @@
|
||||
#extension GL_ARB_shader_clock: enable
|
||||
#extension GL_EXT_shader_realtime_clock: enable
|
||||
#define PROF_USE_REALTIME
|
||||
#ifdef PROF_USE_REALTIME
|
||||
#extension GL_ARB_gpu_shader_int64: enable
|
||||
#extension GL_EXT_shader_realtime_clock: enable
|
||||
#else
|
||||
#extension GL_ARB_shader_clock: enable
|
||||
#endif
|
||||
|
||||
|
||||
#include "utils.glsl"
|
||||
#include "noise.glsl"
|
||||
|
||||
@ -16,11 +22,25 @@ void readNormals(ivec2 uv, out vec3 geometry_normal, out vec3 shading_normal) {
|
||||
shading_normal = normalDecode(n.zw);
|
||||
}
|
||||
|
||||
//#define timeNow clockRealtimeEXT
|
||||
#define timeNow clockARB
|
||||
#ifdef PROF_USE_REALTIME
|
||||
// On mesa+amdgpu there's a clear gradient: pixels on top of screen take 2-3x longer to compute than bottom ones. Also,
|
||||
// it does flicker a lot.
|
||||
// Deltas are about 30000-100000 parrots
|
||||
#define timeNow clockRealtime2x32EXT
|
||||
#else
|
||||
// clockARB doesn't give directly usable time values on mesa+amdgpu
|
||||
// even deltas between them are not meaningful enough.
|
||||
// On mesa+amdgpu clockARB() values are limited to lower 20bits, and they wrap around a lot.
|
||||
// Absolute difference value are often 30-50% of the available range, so it's not that far off from wrapping around
|
||||
// multiple times, rendering the value completely useless.
|
||||
// Deltas are around 300000-500000 parrots.
|
||||
// Other than that, the values seem uniform across the screen (as compared to realtime clock, which has a clearly
|
||||
// visible gradient: top differences are larger than bottom ones.
|
||||
#define timeNow clock2x32ARB
|
||||
#endif
|
||||
|
||||
void main() {
|
||||
const uint64_t time_begin = timeNow();
|
||||
const uvec2 time_begin = timeNow();
|
||||
|
||||
#ifdef RAY_TRACE
|
||||
const vec2 uv = (gl_LaunchIDEXT.xy + .5) / gl_LaunchSizeEXT.xy * 2. - 1.;
|
||||
@ -59,20 +79,32 @@ void main() {
|
||||
vec3 diffuse = vec3(0.), specular = vec3(0.);
|
||||
computeLighting(pos + geometry_normal * .001, shading_normal, throughput, -direction, material, diffuse, specular);
|
||||
|
||||
const uint64_t time_end = timeNow();
|
||||
const uint64_t time_diff = time_end - time_begin;
|
||||
const uvec2 time_end = timeNow();
|
||||
//const uint64_t time_diff = time_end - time_begin;
|
||||
//const uint time_diff = time_begin.x - time_end.x;
|
||||
|
||||
const float time_diff_f = float(time_diff) / 1e6;////float(time_diff >> 60);// / 1e6;
|
||||
#ifdef PROF_USE_REALTIME
|
||||
const uint64_t begin64 = time_begin.x | (uint64_t(time_begin.y) << 32);
|
||||
const uint64_t end64 = time_end.x | (uint64_t(time_end.y) << 32);
|
||||
const uint64_t time_diff = end64 - begin64;
|
||||
const float time_diff_f = float(time_diff) / 1e5;
|
||||
#else
|
||||
const uint time_diff = time_begin.x - time_end.x;
|
||||
const float time_diff_f = float(time_diff & 0xfffffu) / 1e6;
|
||||
#endif
|
||||
|
||||
const uint prof_index = pix.x + pix.y * ubo.ubo.res.x;
|
||||
#if LIGHT_POINT
|
||||
imageStore(out_light_point_diffuse, pix, vec4(diffuse, time_diff_f));
|
||||
imageStore(out_light_point_specular, pix, vec4(specular, 0.f));
|
||||
//imageStore(out_light_point_profile, pix, profile);
|
||||
//prof_direct_point[prof_index].data[0] = vec4(time_begin, time_end);
|
||||
#endif
|
||||
|
||||
#if LIGHT_POLYGON
|
||||
imageStore(out_light_poly_diffuse, pix, vec4(diffuse, time_diff_f));
|
||||
imageStore(out_light_poly_specular, pix, vec4(specular, 0.f));
|
||||
//imageStore(out_light_poly_profile, pix, profile);
|
||||
prof_direct_poly.a[prof_index].data[0] = uvec4(time_begin, time_end);
|
||||
#endif
|
||||
}
|
||||
|
@ -32,6 +32,8 @@ layout(set = 0, binding = 32, std430) readonly buffer Indices { uint16_t a[]; }
|
||||
layout(set = 0, binding = 33, std430) readonly buffer Vertices { Vertex a[]; } vertices;
|
||||
|
||||
//layout(set = 0, binding = 34, rgba16f) uniform writeonly image2D out_light_point_profile;
|
||||
//layout(set = 0, binding = 34, std430) writeonly buffer Profiling { ProfilingStruct prof_direct_point[]; };
|
||||
|
||||
|
||||
#define RAY_QUERY
|
||||
#define LIGHT_POINT 1
|
||||
|
@ -31,7 +31,8 @@ layout(set = 0, binding = 31, std430) readonly buffer Kusochki { Kusok a[]; } ku
|
||||
layout(set = 0, binding = 32, std430) readonly buffer Indices { uint16_t a[]; } indices;
|
||||
layout(set = 0, binding = 33, std430) readonly buffer Vertices { Vertex a[]; } vertices;
|
||||
|
||||
layout(set = 0, binding = 34, rgba16f) uniform writeonly image2D out_light_poly_profile;
|
||||
//layout(set = 0, binding = 34, rgba16f) uniform writeonly image2D out_light_poly_profile;
|
||||
layout(set = 0, binding = 34, std430) writeonly restrict buffer Profiling { ProfilingStruct a[]; } prof_direct_poly;
|
||||
|
||||
#define RAY_QUERY
|
||||
#define LIGHT_POLYGON 1
|
||||
|
@ -45,7 +45,8 @@
|
||||
X(Buffer, light_grid) \
|
||||
X(Texture, textures) \
|
||||
X(Texture, skybox) \
|
||||
X(Texture, blue_noise_texture)
|
||||
X(Texture, blue_noise_texture) \
|
||||
X(Buffer, prof_direct_poly) \
|
||||
|
||||
enum {
|
||||
#define RES_ENUM(type, name) ExternalResource_##name,
|
||||
@ -80,10 +81,13 @@ static struct {
|
||||
matrix4x4 prev_inv_proj, prev_inv_view;
|
||||
|
||||
qboolean reload_pipeline;
|
||||
qboolean dump_profiling_data;
|
||||
qboolean discontinuity;
|
||||
|
||||
int max_frame_width, max_frame_height;
|
||||
|
||||
vk_buffer_t prof_direct_poly;
|
||||
|
||||
struct {
|
||||
cvar_t *rt_debug_display_only;
|
||||
uint32_t rt_debug_display_only_value;
|
||||
@ -267,6 +271,9 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a
|
||||
RES_SET_SBUFFER_FULL(indices, args->render_args->geometry_data);
|
||||
RES_SET_SBUFFER_FULL(vertices, args->render_args->geometry_data);
|
||||
|
||||
// TODO register this one properly
|
||||
RES_SET_SBUFFER_FULL(prof_direct_poly, g_rtx.prof_direct_poly);
|
||||
|
||||
// TODO move this to lights
|
||||
RES_SET_BUFFER(lights, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->metadata.offset, args->light_bindings->metadata.size);
|
||||
RES_SET_BUFFER(light_grid, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args->light_bindings->buffer, args->light_bindings->grid.offset, args->light_bindings->grid.size);
|
||||
@ -593,6 +600,35 @@ fail:
|
||||
R_VkMeatpipeDestroy(newpipe);
|
||||
}
|
||||
|
||||
static void dumpProfilingData(int w, int h) {
|
||||
FILE *f = fopen("gpuprof.dump", "w");
|
||||
for (int y = 0; y < h; ++y) {
|
||||
const struct ProfilingStruct *s = ((const struct ProfilingStruct*)(g_rtx.prof_direct_poly.mapped)) + y * g_rtx.max_frame_width;
|
||||
for (int x = 0; x < w; ++x, ++s) {
|
||||
const unsigned long long begin = s->data[0][0] | ((uint64_t)(s->data[0][1]) << 32);
|
||||
const unsigned long long end = s->data[0][2] | ((uint64_t)(s->data[0][3]) << 32);
|
||||
fprintf(f, "x=%d y=%d %016llx %016llx %llu %llu d=%llu\n", x, y, begin, end, begin, end, end - begin);
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
static void resizeResources(void) {
|
||||
// TODO resize textures/images
|
||||
|
||||
if (g_rtx.prof_direct_poly.buffer != VK_NULL_HANDLE) {
|
||||
VK_BufferDestroy(&g_rtx.prof_direct_poly);
|
||||
}
|
||||
|
||||
const uint32_t size = sizeof(struct ProfilingStruct) * g_rtx.max_frame_width * g_rtx.max_frame_height;
|
||||
const VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
const VkMemoryPropertyFlags memf = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
if (!VK_BufferCreate("prof_direct_poly", &g_rtx.prof_direct_poly, size, usage, memf)) {
|
||||
gEngine.Host_Error("Failed to recreate prof_direct_poly");
|
||||
}
|
||||
}
|
||||
|
||||
void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
|
||||
{
|
||||
APROF_SCOPE_DECLARE_BEGIN(ray_frame_end, __FUNCTION__);
|
||||
@ -612,26 +648,40 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args)
|
||||
// if (vk_core.debug)
|
||||
// XVK_RayModel_Validate();
|
||||
|
||||
qboolean need_reload = g_rtx.reload_pipeline;
|
||||
qboolean resize = false;
|
||||
|
||||
if (g_rtx.max_frame_width < args->dst.width) {
|
||||
g_rtx.max_frame_width = ALIGN_UP(args->dst.width, 16);
|
||||
WARN("Increasing max_frame_width to %d", g_rtx.max_frame_width);
|
||||
// TODO only reload resources, no need to reload the entire pipeline
|
||||
need_reload = true;
|
||||
resize = true;
|
||||
}
|
||||
|
||||
if (g_rtx.max_frame_height < args->dst.height) {
|
||||
g_rtx.max_frame_height = ALIGN_UP(args->dst.height, 16);
|
||||
WARN("Increasing max_frame_height to %d", g_rtx.max_frame_height);
|
||||
// TODO only reload resources, no need to reload the entire pipeline
|
||||
need_reload = true;
|
||||
resize = true;
|
||||
}
|
||||
|
||||
// TODO only reload resources, no need to reload the entire pipeline
|
||||
const qboolean need_reload = g_rtx.reload_pipeline || resize;
|
||||
|
||||
if (need_reload || resize || g_rtx.dump_profiling_data) {
|
||||
WARN("Reloading RTX shaders/pipelines");
|
||||
XVK_CHECK(vkDeviceWaitIdle(vk_core.device));
|
||||
}
|
||||
|
||||
if (g_rtx.dump_profiling_data) {
|
||||
// TODO using next frame w/h is not correct, need previous w/h
|
||||
dumpProfilingData(args->dst.width, args->dst.height);
|
||||
g_rtx.dump_profiling_data = false;
|
||||
}
|
||||
|
||||
if (resize) {
|
||||
// Resize after wait idle
|
||||
resizeResources();
|
||||
}
|
||||
|
||||
if (need_reload) {
|
||||
WARN("Reloading RTX shaders/pipelines");
|
||||
XVK_CHECK(vkDeviceWaitIdle(vk_core.device));
|
||||
|
||||
reloadMainpipe();
|
||||
|
||||
g_rtx.reload_pipeline = false;
|
||||
@ -692,10 +742,14 @@ tail:
|
||||
g_rtx.discontinuity = false;
|
||||
}
|
||||
|
||||
static void reloadPipeline( void ) {
|
||||
static void cmdReloadPipeline( void ) {
|
||||
g_rtx.reload_pipeline = true;
|
||||
}
|
||||
|
||||
static void cmdDumpProfilingData( void ) {
|
||||
g_rtx.dump_profiling_data = true;
|
||||
}
|
||||
|
||||
qboolean VK_RayInit( void )
|
||||
{
|
||||
ASSERT(vk_core.rtx);
|
||||
@ -754,7 +808,10 @@ qboolean VK_RayInit( void )
|
||||
|
||||
RT_RayModel_Clear();
|
||||
|
||||
gEngine.Cmd_AddCommand("rt_debug_reload_pipelines", reloadPipeline, "Reload RT pipelines");
|
||||
resizeResources();
|
||||
|
||||
gEngine.Cmd_AddCommand("rt_debug_reload_pipelines", cmdReloadPipeline, "Reload RT pipelines");
|
||||
gEngine.Cmd_AddCommand("rt_debug_prof_dump", cmdDumpProfilingData, "Dump profiling data");
|
||||
|
||||
#define X(name) #name ", "
|
||||
g_rtx.debug.rt_debug_display_only = gEngine.Cvar_Get("rt_debug_display_only", "", FCVAR_GLCONFIG,
|
||||
@ -775,6 +832,7 @@ void VK_RayShutdown( void ) {
|
||||
VK_BufferDestroy(&g_ray_model_state.model_headers_buffer);
|
||||
VK_BufferDestroy(&g_ray_model_state.kusochki_buffer);
|
||||
VK_BufferDestroy(&g_rtx.uniform_buffer);
|
||||
VK_BufferDestroy(&g_rtx.prof_direct_poly);
|
||||
|
||||
RT_VkAccelShutdown();
|
||||
RT_DynamicModelShutdown();
|
||||
|
Loading…
Reference in New Issue
Block a user