This repository has been archived by the owner on Apr 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 55
/
XeGTAO.h
263 lines (209 loc) · 15.9 KB
/
XeGTAO.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2016-2021, Intel Corporation
//
// SPDX-License-Identifier: MIT
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion",
// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf
//
// Implementation: Filip Strugar ([email protected]), Steve Mccalla <[email protected]> (\_/)
// Version: 1.02 (='.'=)
// Details: https://github.com/GameTechDev/XeGTAO (")_(")
//
// Version history:
// 1.00 (2021-08-09): Initial release
// 1.01 (2021-09-02): Fix for depth going to inf for 'far' depth buffer values that are out of fp16 range
// 1.02 (2021-09-03): More fast_acos use and made final horizon cos clamping optional (off by default): 3-4% perf boost
// 1.10 (2021-09-03): Added a couple of heuristics to combat over-darkening errors in certain scenarios
// 1.20 (2021-09-06): Optional normal from depth generation is now a standalone pass: no longer integrated into
// main XeGTAO pass to reduce complexity and allow reuse; also quality of generated normals improved
// 1.21 (2021-09-28): Replaced 'groupshared'-based denoiser with a slightly slower multi-pass one where a 2-pass new
// equals 1-pass old. However, 1-pass new is faster than the 1-pass old and enough when TAA enabled.
// 1.22 (2021-09-28): Added 'XeGTAO_' prefix to all local functions to avoid name clashes with various user codebases.
// 1.30 (2021-10-10): Added support for directional component (bent normals).
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef __XE_GTAO_TYPES_H__
#define __XE_GTAO_TYPES_H__
#ifdef __cplusplus
#include <cmath>
namespace XeGTAO
{
// cpp<->hlsl mapping
struct Matrix4x4 { float m[16]; };
struct Vector3 { float x,y,z; };
struct Vector2 { float x,y; };
struct Vector2i { int x,y; };
typedef unsigned int uint;
#else // #ifdef __cplusplus
// cpp<->hlsl mapping
#define Matrix4x4 float4x4
#define Vector3 float3
#define Vector2 float2
#define Vector2i int2
#endif
// Global consts that need to be visible from both shader and cpp side
#define XE_GTAO_DEPTH_MIP_LEVELS 5 // this one is hard-coded to 5 for now
#define XE_GTAO_NUMTHREADS_X 8 // these can be changed
#define XE_GTAO_NUMTHREADS_Y 8 // these can be changed
struct GTAOConstants
{
Vector2i ViewportSize;
Vector2 ViewportPixelSize; // .zw == 1.0 / ViewportSize.xy
Vector2 DepthUnpackConsts;
Vector2 CameraTanHalfFOV;
Vector2 NDCToViewMul;
Vector2 NDCToViewAdd;
Vector2 NDCToViewMul_x_PixelSize;
float EffectRadius; // world (viewspace) maximum size of the shadow
float EffectFalloffRange;
float RadiusMultiplier;
float Padding0;
float FinalValuePower;
float DenoiseBlurBeta;
float SampleDistributionPower;
float ThinOccluderCompensation;
float DepthMIPSamplingOffset;
int NoiseIndex; // frameIndex % 64 if using TAA or 0 otherwise
};
// This is used only for the development (ray traced ground truth).
struct ReferenceRTAOConstants
{
float TotalRaysLength ; // similar to Radius from GTAO
float Albedo ; // the assumption on the average material albedo
int MaxBounces ; // how many rays to recurse before stopping
int AccumulatedFrames ; // how many frames have we accumulated so far (after resetting/clearing). If 0 - this is the first.
int AccumulateFrameMax ; // how many frames are we aiming to accumulate; stop when we hit!
int Padding0;
int Padding1;
int Padding2;
#ifdef __cplusplus
ReferenceRTAOConstants( ) { TotalRaysLength = 1.0f; Albedo = 0.0f; MaxBounces = 1; AccumulatedFrames = 0; AccumulateFrameMax = 0; }
#endif
};
#ifndef XE_GTAO_USE_DEFAULT_CONSTANTS
#define XE_GTAO_USE_DEFAULT_CONSTANTS 1
#endif
// some constants reduce performance if provided as dynamic values; if these constants are not required to be dynamic and they match default values,
// set XE_GTAO_USE_DEFAULT_CONSTANTS and the code will compile into a more efficient shader
#define XE_GTAO_DEFAULT_RADIUS_MULTIPLIER (1.457f ) // allows us to use different value as compared to ground truth radius to counter inherent screen space biases
#define XE_GTAO_DEFAULT_FALLOFF_RANGE (0.615f ) // distant samples contribute less
#define XE_GTAO_DEFAULT_SAMPLE_DISTRIBUTION_POWER (2.0f ) // small crevices more important than big surfaces
#define XE_GTAO_DEFAULT_THIN_OCCLUDER_COMPENSATION (0.0f ) // the new 'thickness heuristic' approach
#define XE_GTAO_DEFAULT_FINAL_VALUE_POWER (2.2f ) // modifies the final ambient occlusion value using power function - this allows some of the above heuristics to do different things
#define XE_GTAO_DEFAULT_DEPTH_MIP_SAMPLING_OFFSET (3.30f ) // main trade-off between performance (memory bandwidth) and quality (temporal stability is the first affected, thin objects next)
#define XE_GTAO_OCCLUSION_TERM_SCALE (1.5f) // for packing in UNORM (because raw, pre-denoised occlusion term can overshoot 1 but will later average out to 1)
// From https://www.shadertoy.com/view/3tB3z3 - except we're using R2 here
#define XE_HILBERT_LEVEL 6U
#define XE_HILBERT_WIDTH ( (1U << XE_HILBERT_LEVEL) )
#define XE_HILBERT_AREA ( XE_HILBERT_WIDTH * XE_HILBERT_WIDTH )
inline uint HilbertIndex( uint posX, uint posY )
{
uint index = 0U;
for( uint curLevel = XE_HILBERT_WIDTH/2U; curLevel > 0U; curLevel /= 2U )
{
uint regionX = ( posX & curLevel ) > 0U;
uint regionY = ( posY & curLevel ) > 0U;
index += curLevel * curLevel * ( (3U * regionX) ^ regionY);
if( regionY == 0U )
{
if( regionX == 1U )
{
posX = uint( (XE_HILBERT_WIDTH - 1U) ) - posX;
posY = uint( (XE_HILBERT_WIDTH - 1U) ) - posY;
}
uint temp = posX;
posX = posY;
posY = temp;
}
}
return index;
}
#ifdef __cplusplus
struct GTAOSettings
{
int QualityLevel = 2; // 0: low; 1: medium; 2: high; 3: ultra
int DenoisePasses = 1; // 0: disabled; 1: sharp; 2: medium; 3: soft
float Radius = 0.5f; // [0.0, ~ ] World (view) space size of the occlusion sphere.
// auto-tune-d settings
float RadiusMultiplier = XE_GTAO_DEFAULT_RADIUS_MULTIPLIER;
float FalloffRange = XE_GTAO_DEFAULT_FALLOFF_RANGE;
float SampleDistributionPower = XE_GTAO_DEFAULT_SAMPLE_DISTRIBUTION_POWER;
float ThinOccluderCompensation = XE_GTAO_DEFAULT_THIN_OCCLUDER_COMPENSATION;
float FinalValuePower = XE_GTAO_DEFAULT_FINAL_VALUE_POWER;
float DepthMIPSamplingOffset = XE_GTAO_DEFAULT_DEPTH_MIP_SAMPLING_OFFSET;
};
template<class T> inline T clamp( T const & v, T const & min, T const & max ) { assert( max >= min ); if( v < min ) return min; if( v > max ) return max; return v; }
// If using TAA then set noiseIndex to frameIndex % 64 - otherwise use 0
inline void GTAOUpdateConstants( XeGTAO::GTAOConstants& consts, int viewportWidth, int viewportHeight, const XeGTAO::GTAOSettings & settings, const float projMatrix[16], bool rowMajor, unsigned int frameCounter )
{
consts.ViewportSize = { viewportWidth, viewportHeight };
consts.ViewportPixelSize = { 1.0f / (float)viewportWidth, 1.0f / (float)viewportHeight };
float depthLinearizeMul = (rowMajor)?(-projMatrix[3 * 4 + 2]):(-projMatrix[3 + 2 * 4]); // float depthLinearizeMul = ( clipFar * clipNear ) / ( clipFar - clipNear );
float depthLinearizeAdd = (rowMajor)?( projMatrix[2 * 4 + 2]):( projMatrix[2 + 2 * 4]); // float depthLinearizeAdd = clipFar / ( clipFar - clipNear );
// correct the handedness issue. need to make sure this below is correct, but I think it is.
if( depthLinearizeMul * depthLinearizeAdd < 0 )
depthLinearizeAdd = -depthLinearizeAdd;
consts.DepthUnpackConsts = { depthLinearizeMul, depthLinearizeAdd };
float tanHalfFOVY = 1.0f / ((rowMajor)?(projMatrix[1 * 4 + 1]):(projMatrix[1 + 1 * 4])); // = tanf( drawContext.Camera.GetYFOV( ) * 0.5f );
float tanHalfFOVX = 1.0F / ((rowMajor)?(projMatrix[0 * 4 + 0]):(projMatrix[0 + 0 * 4])); // = tanHalfFOVY * drawContext.Camera.GetAspect( );
consts.CameraTanHalfFOV = { tanHalfFOVX, tanHalfFOVY };
consts.NDCToViewMul = { consts.CameraTanHalfFOV.x * 2.0f, consts.CameraTanHalfFOV.y * -2.0f };
consts.NDCToViewAdd = { consts.CameraTanHalfFOV.x * -1.0f, consts.CameraTanHalfFOV.y * 1.0f };
consts.NDCToViewMul_x_PixelSize = { consts.NDCToViewMul.x * consts.ViewportPixelSize.x, consts.NDCToViewMul.y * consts.ViewportPixelSize.y };
consts.EffectRadius = settings.Radius;
consts.EffectFalloffRange = settings.FalloffRange;
consts.DenoiseBlurBeta = (settings.DenoisePasses==0)?(1e4f):(1.2f); // high value disables denoise - more elegant & correct way would be do set all edges to 0
consts.RadiusMultiplier = settings.RadiusMultiplier;
consts.SampleDistributionPower = settings.SampleDistributionPower;
consts.ThinOccluderCompensation = settings.ThinOccluderCompensation;
consts.FinalValuePower = settings.FinalValuePower;
consts.DepthMIPSamplingOffset = settings.DepthMIPSamplingOffset;
consts.NoiseIndex = (settings.DenoisePasses>0)?(frameCounter % 64):(0);
consts.Padding0 = 0;
}
#ifdef IMGUI_API
inline bool GTAOImGuiSettings( XeGTAO::GTAOSettings & settings )
{
bool hadChanges = false;
ImGui::PushItemWidth( 120.0f );
ImGui::Text( "Performance/quality settings:" );
ImGui::Combo( "Quality Level", &settings.QualityLevel, "Low\0Medium\0High\0Ultra\00");
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Higher quality settings use more samples per pixel but are slower" );
settings.QualityLevel = clamp( settings.QualityLevel , 0, 3 );
ImGui::Combo( "Denoising level", &settings.DenoisePasses, "Disabled\0Sharp\0Medium\0Soft\00");
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "The amount of edge-aware spatial denoise applied" );
settings.DenoisePasses = clamp( settings.DenoisePasses , 0, 3 );
ImGui::Text( "Visual settings:" );
settings.Radius = clamp( settings.Radius, 0.0f, 100000.0f );
hadChanges |= ImGui::InputFloat( "Effect radius", &settings.Radius , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "World (viewspace) effect radius\nExpected range: depends on the scene & requirements, anything from 0.01 to 1000+" );
settings.Radius = clamp( settings.Radius , 0.0f, 10000.0f );
if( ImGui::CollapsingHeader( "Auto-tuned settings (heuristics)" ) )
{
hadChanges |= ImGui::InputFloat( "Radius multiplier", &settings.RadiusMultiplier , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Multiplies the 'Effect Radius' - used by the auto-tune to best match raytraced ground truth\nExpected range: [0.3, 3.0], defaults to %.3f", XE_GTAO_DEFAULT_RADIUS_MULTIPLIER );
settings.RadiusMultiplier = clamp( settings.RadiusMultiplier , 0.3f, 3.0f );
hadChanges |= ImGui::InputFloat( "Falloff range", &settings.FalloffRange , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Gently reduce sample impact as it gets out of 'Effect radius' bounds\nExpected range: [0.0, 1.0], defaults to %.3f", XE_GTAO_DEFAULT_FALLOFF_RANGE );
settings.FalloffRange = clamp( settings.FalloffRange , 0.0f, 1.0f );
hadChanges |= ImGui::InputFloat( "Sample distribution power", &settings.SampleDistributionPower , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Make samples on a slice equally distributed (1.0) or focus more towards the center (>1.0)\nExpected range: [1.0, 3.0], 2defaults to %.3f", XE_GTAO_DEFAULT_SAMPLE_DISTRIBUTION_POWER );
settings.SampleDistributionPower = clamp( settings.SampleDistributionPower , 1.0f, 3.0f );
hadChanges |= ImGui::InputFloat( "Thin occluder compensation", &settings.ThinOccluderCompensation, 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Slightly reduce impact of samples further back to counter the bias from depth-based (incomplete) input scene geometry data\nExpected range: [0.0, 0.7], defaults to %.3f", XE_GTAO_DEFAULT_THIN_OCCLUDER_COMPENSATION );
settings.ThinOccluderCompensation = clamp( settings.ThinOccluderCompensation , 0.0f, 0.7f );
hadChanges |= ImGui::InputFloat( "Final power", &settings.FinalValuePower, 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Applies power function to the final value: occlusion = pow( occlusion, finalPower )\nExpected range: [0.5, 5.0], defaults to %.3f", XE_GTAO_DEFAULT_FINAL_VALUE_POWER );
settings.FinalValuePower = clamp( settings.FinalValuePower , 0.5f, 5.0f );
hadChanges |= ImGui::InputFloat( "Depth MIP sampling offset", &settings.DepthMIPSamplingOffset, 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Mainly performance (texture memory bandwidth) setting but as a side-effect reduces overshadowing by thin objects and increases temporal instability\nExpected range: [2.0, 6.0], defaults to %.3f", XE_GTAO_DEFAULT_DEPTH_MIP_SAMPLING_OFFSET );
settings.DepthMIPSamplingOffset = clamp( settings.DepthMIPSamplingOffset , 0.0f, 30.0f );
}
ImGui::PopItemWidth( );
return hadChanges;
}
#endif // IMGUI_API
} // close the namespace
#endif // #ifdef __cplusplus
#endif // __XE_GTAO_TYPES_H__