#411 Trace alpha masked and transparent surfaces

Tracing alpha masked shadows is now extremely expensive, fps is roughly cut in half :(

Will need to address it separately
This commit is contained in:
Ivan Avdeev 2023-02-03 12:22:43 -08:00 committed by GitHub
commit 18a7c61505
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 170 additions and 69 deletions

View File

@ -42,9 +42,9 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
const float stopdot = lights.m.point_lights[i].color_stopdot.a; const float stopdot = lights.m.point_lights[i].color_stopdot.a;
const vec3 dir = lights.m.point_lights[i].dir_stopdot2.xyz; const vec3 dir = lights.m.point_lights[i].dir_stopdot2.xyz;
const float stopdot2 = lights.m.point_lights[i].dir_stopdot2.a; const float stopdot2 = lights.m.point_lights[i].dir_stopdot2.a;
const bool not_environment = (lights.m.point_lights[i].environment == 0); const bool is_environment = (lights.m.point_lights[i].environment != 0);
const vec3 light_dir = not_environment ? (origin_r.xyz - P) : -dir; // TODO need to randomize sampling direction for environment soft shadow const vec3 light_dir = is_environment ? -dir : (origin_r.xyz - P); // TODO need to randomize sampling direction for environment soft shadow
const float radius = origin_r.w; const float radius = origin_r.w;
const vec3 light_dir_norm = normalize(light_dir); const vec3 light_dir_norm = normalize(light_dir);
@ -64,7 +64,9 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
float light_dist = 1e5; // TODO this is supposedly not the right way to do shadows for environment lights.m. qrad checks for hitting SURF_SKY, and maybe we should too? float light_dist = 1e5; // TODO this is supposedly not the right way to do shadows for environment lights.m. qrad checks for hitting SURF_SKY, and maybe we should too?
const float d2 = dot(light_dir, light_dir); const float d2 = dot(light_dir, light_dir);
const float r2 = origin_r.w * origin_r.w; const float r2 = origin_r.w * origin_r.w;
if (not_environment) { if (is_environment) {
color *= 2; // TODO WHY?
} else {
if (radius < 1e-3) if (radius < 1e-3)
continue; continue;
@ -86,8 +88,6 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
//const float pdf = TWO_PI / asin(radius / dist); //const float pdf = TWO_PI / asin(radius / dist);
const float pdf = 1. / ((1. - sqrt(d2 - r2) / dist) * spot_attenuation); const float pdf = 1. / ((1. - sqrt(d2 - r2) / dist) * spot_attenuation);
color /= pdf; color /= pdf;
} else {
color *= 2;
} }
// if (dot(color,color) < color_culling_threshold) // if (dot(color,color) < color_culling_threshold)
@ -104,14 +104,8 @@ void computePointLights(vec3 P, vec3 N, uint cluster_index, vec3 throughput, vec
continue; continue;
// FIXME split environment and other lights // FIXME split environment and other lights
if (not_environment) { if (shadowed(P, light_dir_norm, light_dist + shadow_offset_fudge, is_environment))
if (shadowed(P, light_dir_norm, light_dist + shadow_offset_fudge)) continue;
continue;
} else {
// for environment light check that we've hit SURF_SKY
if (shadowedSky(P, light_dir_norm, light_dist + shadow_offset_fudge))
continue;
}
diffuse += ldiffuse; diffuse += ldiffuse;
specular += lspecular; specular += lspecular;

View File

@ -1,8 +1,11 @@
#ifndef LIGHT_COMMON_GLSL_INCLUDED #ifndef LIGHT_COMMON_GLSL_INCLUDED
#define LIGHT_COMMON_GLSL_INCLUDED #define LIGHT_COMMON_GLSL_INCLUDED
#extension GL_EXT_nonuniform_qualifier : enable
#include "ray_kusochki.glsl" #include "ray_kusochki.glsl"
layout(set = 0, binding = 6) uniform sampler2D textures[MAX_TEXTURES];
#ifdef RAY_TRACE2 #ifdef RAY_TRACE2
#include "ray_shadow_interface.glsl" #include "ray_shadow_interface.glsl"
layout(location = PAYLOAD_LOCATION_SHADOW) rayPayloadEXT RayPayloadShadow payload_shadow; layout(location = PAYLOAD_LOCATION_SHADOW) rayPayloadEXT RayPayloadShadow payload_shadow;
@ -20,7 +23,53 @@ uint traceShadowRay(vec3 pos, vec3 dir, float dist, uint flags) {
} }
#endif #endif
bool shadowed(vec3 pos, vec3 dir, float dist) { #if defined(RAY_QUERY)
bool shadowTestAlphaMask(vec3 pos, vec3 dir, float dist) {
rayQueryEXT rq;
const uint flags = 0
| gl_RayFlagsCullFrontFacingTrianglesEXT
//| gl_RayFlagsNoOpaqueEXT
| gl_RayFlagsTerminateOnFirstHitEXT
;
rayQueryInitializeEXT(rq, tlas, flags, GEOMETRY_BIT_ALPHA_TEST, pos, 0., dir, dist);
while (rayQueryProceedEXT(rq)) {
// Alpha test, takes 10ms
// TODO check other possible ways of doing alpha test. They might be more efficient:
// 1. Do a separate ray query for alpha masked geometry. Reason: here we might accidentally do the expensive
// texture sampling for geometry that's ultimately invisible (i.e. behind walls). Also, shader threads congruence.
// Separate pass could be more efficient as it'd be doing the same thing for every invocation.
// 2. Same as the above, but also with a completely independent TLAS. Why: no need to mask-check geometry for opaque-vs-alpha
const uint instance_kusochki_offset = rayQueryGetIntersectionInstanceCustomIndexEXT(rq, false);
const uint geometry_index = rayQueryGetIntersectionGeometryIndexEXT(rq, false);
const uint kusok_index = instance_kusochki_offset + geometry_index;
const Kusok kusok = getKusok(kusok_index);
const uint primitive_index = rayQueryGetIntersectionPrimitiveIndexEXT(rq, false);
const uint first_index_offset = kusok.index_offset + primitive_index * 3;
const uint vi1 = uint(getIndex(first_index_offset+0)) + kusok.vertex_offset;
const uint vi2 = uint(getIndex(first_index_offset+1)) + kusok.vertex_offset;
const uint vi3 = uint(getIndex(first_index_offset+2)) + kusok.vertex_offset;
const vec2 uvs[3] = {
getVertex(vi1).gl_tc,
getVertex(vi2).gl_tc,
getVertex(vi3).gl_tc,
};
const vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rq, false);
const vec2 uv = baryMix(uvs[0], uvs[1], uvs[2], bary);
const vec4 texture_color = texture(textures[nonuniformEXT(kusok.tex_base_color)], uv);
const float alpha_mask_threshold = .1f;
if (texture_color.a >= alpha_mask_threshold) {
rayQueryConfirmIntersectionEXT(rq);
}
}
return rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionTriangleEXT;
}
#endif
bool shadowed(vec3 pos, vec3 dir, float dist, bool check_sky) {
#ifdef RAY_TRACE #ifdef RAY_TRACE
const uint flags = 0 const uint flags = 0
//| gl_RayFlagsCullFrontFacingTrianglesEXT //| gl_RayFlagsCullFrontFacingTrianglesEXT
@ -29,54 +78,32 @@ bool shadowed(vec3 pos, vec3 dir, float dist) {
| gl_RayFlagsSkipClosestHitShaderEXT | gl_RayFlagsSkipClosestHitShaderEXT
; ;
const uint hit_type = traceShadowRay(pos, dir, dist, flags); const uint hit_type = traceShadowRay(pos, dir, dist, flags);
return payload_shadow.hit_type == SHADOW_HIT; return check_sky ? payload_shadow.hit_type != SHADOW_SKY : payload_shadow.hit_type == SHADOW_HIT;
#elif defined(RAY_QUERY) #elif defined(RAY_QUERY)
rayQueryEXT rq; {
const uint flags = 0 const uint flags = 0
//| gl_RayFlagsCullFrontFacingTrianglesEXT | gl_RayFlagsCullFrontFacingTrianglesEXT
//| gl_RayFlagsOpaqueEXT | gl_RayFlagsOpaqueEXT
| gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsTerminateOnFirstHitEXT
//| gl_RayFlagsSkipClosestHitShaderEXT ;
; rayQueryEXT rq;
rayQueryInitializeEXT(rq, tlas, flags, GEOMETRY_BIT_OPAQUE, pos, 0., dir, dist - shadow_offset_fudge); rayQueryInitializeEXT(rq, tlas, flags, GEOMETRY_BIT_OPAQUE, pos, 0., dir, dist - shadow_offset_fudge);
// TODO alpha test while (rayQueryProceedEXT(rq)) {}
while (rayQueryProceedEXT(rq)) { }
return rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionTriangleEXT;
#else
#error RAY_TRACE or RAY_QUERY
#endif
}
// TODO join with just shadowed() if (rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionTriangleEXT) {
bool shadowedSky(vec3 pos, vec3 dir, float dist) { if (!check_sky)
#ifdef RAY_TRACE return true;
const uint flags = 0
//| gl_RayFlagsCullFrontFacingTrianglesEXT
//| gl_RayFlagsOpaqueEXT
//| gl_RayFlagsTerminateOnFirstHitEXT
//| gl_RayFlagsSkipClosestHitShaderEXT
;
const uint hit_type = traceShadowRay(pos, dir, dist, flags);
return payload_shadow.hit_type != SHADOW_SKY;
#elif defined(RAY_QUERY)
rayQueryEXT rq;
const uint flags = 0
//| gl_RayFlagsCullFrontFacingTrianglesEXT
//| gl_RayFlagsOpaqueEXT
//| gl_RayFlagsTerminateOnFirstHitEXT
//| gl_RayFlagsSkipClosestHitShaderEXT
;
rayQueryInitializeEXT(rq, tlas, flags, GEOMETRY_BIT_OPAQUE, pos, 0., dir, dist - shadow_offset_fudge);
// TODO alpha test
while (rayQueryProceedEXT(rq)) { }
if (rayQueryGetIntersectionTypeEXT(rq, true) != gl_RayQueryCommittedIntersectionTriangleEXT) const int instance_kusochki_offset = rayQueryGetIntersectionInstanceCustomIndexEXT(rq, true);
return true; const int kusok_index = instance_kusochki_offset + rayQueryGetIntersectionGeometryIndexEXT(rq, true);
const uint tex_base_color = getKusok(kusok_index).tex_base_color;
if ((tex_base_color & KUSOK_MATERIAL_FLAG_SKYBOX) == 0)
return true;
}
}
return shadowTestAlphaMask(pos, dir, dist);
const int instance_kusochki_offset = rayQueryGetIntersectionInstanceCustomIndexEXT(rq, true);
const int kusok_index = instance_kusochki_offset + rayQueryGetIntersectionGeometryIndexEXT(rq, true);
const uint tex_base_color = getKusok(kusok_index).tex_base_color;
return (tex_base_color & KUSOK_MATERIAL_FLAG_SKYBOX) == 0;
#else #else
#error RAY_TRACE or RAY_QUERY #error RAY_TRACE or RAY_QUERY
#endif #endif

View File

@ -177,7 +177,7 @@ void sampleSinglePolygonLight(in vec3 P, in vec3 N, in vec3 view_dir, in SampleC
const float dist = - dot(vec4(P, 1.f), poly.plane) / dot(light_sample_dir.xyz, poly.plane.xyz); const float dist = - dot(vec4(P, 1.f), poly.plane) / dot(light_sample_dir.xyz, poly.plane.xyz);
if (shadowed(P, light_sample_dir.xyz, dist)) if (shadowed(P, light_sample_dir.xyz, dist, false))
return; return;
vec3 poly_diffuse = vec3(0.), poly_specular = vec3(0.); vec3 poly_diffuse = vec3(0.), poly_specular = vec3(0.);
@ -244,7 +244,7 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate
const float dist = - plane_dist / dot(light_sample_dir.xyz, poly.plane.xyz); const float dist = - plane_dist / dot(light_sample_dir.xyz, poly.plane.xyz);
const vec3 emissive = poly.emissive; const vec3 emissive = poly.emissive;
if (!shadowed(P, light_sample_dir.xyz, dist)) { if (!shadowed(P, light_sample_dir.xyz, dist, false)) {
//const float estimate = total_contrib; //const float estimate = total_contrib;
const float estimate = light_sample_dir.w; const float estimate = light_sample_dir.w;
vec3 poly_diffuse = vec3(0.), poly_specular = vec3(0.); vec3 poly_diffuse = vec3(0.), poly_specular = vec3(0.);
@ -320,7 +320,7 @@ void sampleEmissiveSurfaces(vec3 P, vec3 N, vec3 throughput, vec3 view_dir, Mate
const vec3 emissive = poly.emissive; const vec3 emissive = poly.emissive;
//if (true) {//!shadowed(P, light_sample_dir.xyz, dist)) { //if (true) {//!shadowed(P, light_sample_dir.xyz, dist)) {
if (!shadowed(P, light_sample_dir.xyz, dist)) { if (!shadowed(P, light_sample_dir.xyz, dist, false)) {
//const float estimate = total_contrib; //const float estimate = total_contrib;
const float estimate = light_sample_dir.w; const float estimate = light_sample_dir.w;
vec3 poly_diffuse = vec3(0.), poly_specular = vec3(0.); vec3 poly_diffuse = vec3(0.), poly_specular = vec3(0.);

View File

@ -47,8 +47,9 @@ LIST_SPECIALIZATION_CONSTANTS(DECLARE_SPECIALIZATION_CONSTANT)
#endif // not GLSL #endif // not GLSL
#define GEOMETRY_BIT_OPAQUE 0x01 #define GEOMETRY_BIT_OPAQUE 0x01
#define GEOMETRY_BIT_ADDITIVE 0x02 #define GEOMETRY_BIT_ALPHA_TEST 0x02
#define GEOMETRY_BIT_REFRACTIVE 0x04 #define GEOMETRY_BIT_ADDITIVE 0x04
#define GEOMETRY_BIT_REFRACTIVE 0x08
#define SHADER_OFFSET_MISS_REGULAR 0 #define SHADER_OFFSET_MISS_REGULAR 0
#define SHADER_OFFSET_MISS_SHADOW 1 #define SHADER_OFFSET_MISS_SHADOW 1

View File

@ -23,6 +23,30 @@ RAY_PRIMARY_OUTPUTS(X)
layout(set = 0, binding = 1) uniform accelerationStructureEXT tlas; layout(set = 0, binding = 1) uniform accelerationStructureEXT tlas;
vec3 traceAdditive(vec3 pos, vec3 dir, float L) {
const float additive_soft_overshoot = 16.;
vec3 ret = vec3(0., 0., 0.);
rayQueryEXT rq;
const uint flags = 0
| gl_RayFlagsCullFrontFacingTrianglesEXT
//| gl_RayFlagsSkipClosestHitShaderEXT
| gl_RayFlagsNoOpaqueEXT // force all to be non-opaque
;
rayQueryInitializeEXT(rq, tlas, flags, GEOMETRY_BIT_ADDITIVE, pos, 0., dir, L + additive_soft_overshoot);
while (rayQueryProceedEXT(rq)) {
const MiniGeometry geom = readCandidateMiniGeometry(rq);
const uint tex_base_color = getKusok(geom.kusok_index).tex_base_color;
const vec4 texture_color = texture(textures[nonuniformEXT(tex_base_color)], geom.uv);
const vec3 kusok_emissive = getKusok(geom.kusok_index).emissive;
const vec3 color = texture_color.rgb * kusok_emissive * texture_color.a; // * kusok_color.a;
const float hit_t = rayQueryGetIntersectionTEXT(rq, false);
const float overshoot = hit_t - L;
ret += color * smoothstep(additive_soft_overshoot, 0., overshoot);
}
return ret;
}
void main() { void main() {
const ivec2 pix = ivec2(gl_GlobalInvocationID); const ivec2 pix = ivec2(gl_GlobalInvocationID);
const ivec2 res = ivec2(imageSize(out_position_t)); const ivec2 res = ivec2(imageSize(out_position_t));
@ -50,14 +74,36 @@ void main() {
//| gl_RayFlagsTerminateOnFirstHitEXT //| gl_RayFlagsTerminateOnFirstHitEXT
//| gl_RayFlagsSkipClosestHitShaderEXT //| gl_RayFlagsSkipClosestHitShaderEXT
; ;
const float L = 10000.; // TODO Why 10k? float L = 10000.; // TODO Why 10k?
rayQueryInitializeEXT(rq, tlas, flags, GEOMETRY_BIT_OPAQUE, origin, 0., direction, L); rayQueryInitializeEXT(rq, tlas, flags, GEOMETRY_BIT_OPAQUE | GEOMETRY_BIT_ALPHA_TEST, origin, 0., direction, L);
// TODO alpha test while (rayQueryProceedEXT(rq)) {
while (rayQueryProceedEXT(rq)) { } if (0 != (rayQueryGetRayFlagsEXT(rq) & gl_RayFlagsOpaqueEXT))
continue;
// alpha test
// TODO check other possible ways of doing alpha test. They might be more efficient
// (although in this particular primary ray case it's not taht important):
// 1. Do a separate ray query for alpha masked geometry. Reason: here we might accidentally do the expensive
// texture sampling for geometry that's ultimately invisible (i.e. behind walls). Also, shader threads congruence.
// Separate pass could be more efficient as it'd be doing the same thing for every invocation.
// 2. Same as the above, but also with a completely independent TLAS. Why: no need to mask-check geometry for opaque-vs-alpha
const MiniGeometry geom = readCandidateMiniGeometry(rq);
const uint tex_base_color = getKusok(geom.kusok_index).tex_base_color;
const vec4 texture_color = texture(textures[nonuniformEXT(tex_base_color)], geom.uv);
const float alpha_mask_threshold = .1f;
if (texture_color.a >= alpha_mask_threshold) {
rayQueryConfirmIntersectionEXT(rq);
}
}
if (rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionTriangleEXT) { if (rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionTriangleEXT) {
primaryRayHit(rq, payload); primaryRayHit(rq, payload);
L = rayQueryGetIntersectionTEXT(rq, true);
} }
payload.emissive.rgb += traceAdditive(origin, direction, L);
imageStore(out_position_t, pix, payload.hit_t); imageStore(out_position_t, pix, payload.hit_t);
imageStore(out_base_color_a, pix, payload.base_color_a); imageStore(out_base_color_a, pix, payload.base_color_a);
imageStore(out_normals_gs, pix, payload.normals_gs); imageStore(out_normals_gs, pix, payload.normals_gs);

View File

@ -125,4 +125,37 @@ Geometry readHitGeometry(vec2 bary, float ray_cone_width) {
return geom; return geom;
} }
#ifdef RAY_QUERY
struct MiniGeometry {
vec2 uv;
uint kusok_index;
};
MiniGeometry readCandidateMiniGeometry(rayQueryEXT rq) {
const uint instance_kusochki_offset = rayQueryGetIntersectionInstanceCustomIndexEXT(rq, false);
const uint geometry_index = rayQueryGetIntersectionGeometryIndexEXT(rq, false);
const uint kusok_index = instance_kusochki_offset + geometry_index;
const Kusok kusok = getKusok(kusok_index);
const uint primitive_index = rayQueryGetIntersectionPrimitiveIndexEXT(rq, false);
const uint first_index_offset = kusok.index_offset + primitive_index * 3;
const uint vi1 = uint(getIndex(first_index_offset+0)) + kusok.vertex_offset;
const uint vi2 = uint(getIndex(first_index_offset+1)) + kusok.vertex_offset;
const uint vi3 = uint(getIndex(first_index_offset+2)) + kusok.vertex_offset;
const vec2 uvs[3] = {
getVertex(vi1).gl_tc,
getVertex(vi2).gl_tc,
getVertex(vi3).gl_tc,
};
const vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rq, false);
const vec2 uv = baryMix(uvs[0], uvs[1], uvs[2], bary);
MiniGeometry ret;
ret.uv = uv;
ret.kusok_index = kusok_index;
return ret;
}
#endif // #ifdef RAY_QUERY
#endif // RT_GEOMETRY_GLSL_INCLUDED #endif // RT_GEOMETRY_GLSL_INCLUDED

View File

@ -39,7 +39,7 @@ typedef struct {
prev_states_storage_t g_prev = { 0 }; prev_states_storage_t g_prev = { 0 };
inline int clampIndex( int index, int array_length ) static inline int clampIndex( int index, int array_length )
{ {
if (index < 0) if (index < 0)
return 0; return 0;

View File

@ -179,7 +179,7 @@ void RT_VkAccelPrepareTlas(VkCommandBuffer cmdbuf) {
inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR;
break; break;
case MaterialMode_Opaque_AlphaTest: case MaterialMode_Opaque_AlphaTest:
inst[i].mask = GEOMETRY_BIT_OPAQUE; inst[i].mask = GEOMETRY_BIT_ALPHA_TEST;
inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_ALPHA_TEST, inst[i].instanceShaderBindingTableRecordOffset = SHADER_OFFSET_HIT_ALPHA_TEST,
inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR; inst[i].flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR;
break; break;