From 3126952cd60f22f2657c18c74aa771e317470f54 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 6 May 2023 23:12:52 +1000 Subject: [PATCH] GS/HW: Add ROV based rendering for DX11/DX12/Vulkan --- bin/resources/shaders/dx11/tfx.fx | 194 ++++++++-- bin/resources/shaders/vulkan/tfx.glsl | 155 +++++++- pcsx2-qt/Settings/GraphicsSettingsWidget.cpp | 1 + pcsx2-qt/Settings/GraphicsSettingsWidget.ui | 26 ++ pcsx2/Config.h | 1 + pcsx2/GS/Renderers/Common/GSDevice.h | 19 + pcsx2/GS/Renderers/Common/GSTexture.cpp | 3 +- pcsx2/GS/Renderers/Common/GSTexture.h | 1 + pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp | 8 +- pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 169 ++++++++- pcsx2/GS/Renderers/DX11/GSDevice11.h | 3 + pcsx2/GS/Renderers/DX11/GSTexture11.cpp | 5 +- pcsx2/GS/Renderers/DX12/D3D12ShaderCache.cpp | 3 + pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 275 +++++++++++--- pcsx2/GS/Renderers/DX12/GSDevice12.h | 54 +-- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 358 +++++++++++++++-- pcsx2/GS/Renderers/HW/GSRendererHW.h | 13 + pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 20 + pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm | 1 + pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp | 8 + pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 380 +++++++++++++++---- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h | 31 +- pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp | 5 +- pcsx2/GS/Renderers/Vulkan/VKBuilders.cpp | 50 ++- pcsx2/GS/Renderers/Vulkan/VKBuilders.h | 10 +- pcsx2/GS/Renderers/Vulkan/VKEntryPoints.inl | 10 + pcsx2/Pcsx2Config.cpp | 3 + 27 files changed, 1521 insertions(+), 285 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index f8aff2b2af13c..8049ce5871589 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -77,11 +77,16 @@ #define PS_NO_COLOR 0 #define PS_NO_COLOR1 0 #define PS_DATE 0 +#define PS_ROV 0 +#define PS_ZTST 0 +#define PS_ZWE 0 +#define PS_AFAIL 0 #endif #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) #define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1)) #define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED) +#define ROV_DEPTH (PS_ZTST != 0 || PS_ZWE != 0) struct VS_INPUT { @@ -136,17 +141,26 @@ struct PS_OUTPUT #endif #endif #endif -#if PS_ZCLAMP +#if PS_ZCLAMP && !ROV_DEPTH float depth : SV_Depth; #endif }; Texture2D Texture : register(t0); Texture2D Palette : register(t1); +#if !PS_ROV Texture2D RtTexture : register(t2); +#endif Texture2D PrimMinTexture : register(t3); SamplerState TextureSampler : register(s0); +#if PS_ROV +RasterizerOrderedTexture2D rovRT : register(u0); +#if ROV_DEPTH +RasterizerOrderedTexture2D rovDS : register(u1); +#endif +#endif + #ifdef DX12 cbuffer cb1 : register(b1) #else @@ -171,10 +185,73 @@ cbuffer cb1 float RcpScaleFactor; }; + +#if PS_ROV + +static float4 rovRTValue; +static uint4 rovFbMask; +static bool rovPixelTestResult; +static bool rovDepthWrite; + +#define DISCARD return output + +float4 sample_from_rt(int2 uv) +{ + return rovRTValue; +} + +void rov_read_rt(int2 uv) +{ + rovRTValue = rovRT[uv]; + rovPixelTestResult = true; + rovFbMask = FbMask; + rovDepthWrite = PS_ZWE != 0; +} + +void rov_write_rt(int2 uv, float4 color) +{ + if (rovPixelTestResult) + rovRT[uv] = color; +} + +#if ROV_DEPTH + +bool rov_depth_test(int2 uv, float z) +{ + bool zpass = true; + +#if PS_ZTST > 1 + float ds_z = rovDS[uv]; + #if PS_ZTST == 2 + zpass = (z >= ds_z); + #elif PS_ZTST == 3 + zpass = (z > ds_z); + #endif +#endif + + if (zpass && rovDepthWrite) + rovDS[uv] = z; + + return zpass; +} + +#endif + +#else + +#define DISCARD discard + +float4 sample_from_rt(int2 uv) +{ + return RtTexture.Load(int3(uv, 0)); +} + +#endif + float4 sample_c(float2 uv, float uv_w) { #if PS_TEX_IS_FB == 1 - return RtTexture.Load(int3(int2(uv * WH.zw), 0)); + return sample_from_rt(int2(uv * WH.zw)); #elif PS_REGION_RECT == 1 return Texture.Load(int3(int2(uv), 0)); #else @@ -378,7 +455,7 @@ float4x4 sample_4p(uint4 u) int fetch_raw_depth(int2 xy) { #if PS_TEX_IS_FB == 1 - float4 col = RtTexture.Load(int3(xy, 0)); + float4 col = sample_from_rt(xy); #else float4 col = Texture.Load(int3(xy, 0)); #endif @@ -388,7 +465,7 @@ int fetch_raw_depth(int2 xy) float4 fetch_raw_color(int2 xy) { #if PS_TEX_IS_FB == 1 - return RtTexture.Load(int3(xy, 0)); + return sample_from_rt(xy); #else return Texture.Load(int3(xy, 0)); #endif @@ -792,12 +869,16 @@ float4 ps_color(PS_INPUT input) return C; } -void ps_fbmask(inout float4 C, float2 pos_xy) +void ps_fbmask(inout float4 C, int2 pos_xy) { if (PS_FBMASK) { - float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f); - C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask)); + float4 RT = trunc(sample_from_rt(pos_xy) * 255.0f + 0.1f); + #if PS_ROV + C = (float4)(((uint4)C & ~rovFbMask) | ((uint4)RT & rovFbMask)); + #else + C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask)); + #endif } } @@ -850,7 +931,7 @@ void ps_color_clamp_wrap(inout float3 C) } } -void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) +void ps_blend(inout float4 Color, inout float4 As_rgba, int2 pos_xy) { float As = As_rgba.a; @@ -864,7 +945,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) return; } - float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f; + float4 RT = SW_BLEND_NEEDS_RT ? sample_from_rt(pos_xy) : (float4)0.0f; if (PS_SHUFFLE && SW_BLEND_NEEDS_RT) { @@ -980,23 +1061,57 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) } } +#if PS_ROV && !ROV_DEPTH +[earlydepthstencil] +#endif + PS_OUTPUT ps_main(PS_INPUT input) { + int2 input_xy = int2(input.p.xy); + float input_z = PS_ZCLAMP ? min(input.p.z, MaxDepthPS) : input.p.z; + +#if PS_ROV + rov_read_rt(input_xy); +#endif + + PS_OUTPUT output; +#if PS_ZCLAMP && !ROV_DEPTH + output.depth = input_z; +#endif + float4 C = ps_color(input); bool atst_pass = atst(C); -#if PS_AFAIL == 0 // KEEP or ATST off if (!atst_pass) - discard; + { +#if PS_ROV + if (PS_AFAIL == 0) + { + DISCARD; + } + else if (PS_AFAIL == 1) // FB_ONLY + { + rovDepthWrite = false; + } + else if (PS_AFAIL == 2) // ZB_ONLY + { + rovFbMask = 0xFF; + } + else if (PS_AFAIL == 3) // RGB_ONLY + { + rovFbMask.a = 0xFF; + rovDepthWrite = false; + } +#elif PS_AFAIL == 0 + DISCARD; #endif - - PS_OUTPUT output; + } if (PS_SCANMSK & 2) { // fail depth test on prohibited lines - if ((int(input.p.y) & 1) == (PS_SCANMSK & 1)) - discard; + if ((input_xy.y & 1) == (PS_SCANMSK & 1)) + DISCARD; } // Must be done before alpha correction @@ -1010,7 +1125,7 @@ PS_OUTPUT ps_main(PS_INPUT input) float4 alpha_blend = (float4)0.0f; if (SW_AD_TO_HW) { - float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 128.0f + 0.1f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f); + float4 RT = PS_RTA_CORRECTION ? trunc(sample_from_rt(input_xy) * 128.0f + 0.1f) : trunc(sample_from_rt(input_xy) * 255.0f + 0.1f); alpha_blend = (float4)(RT.a / 128.0f); } else @@ -1030,12 +1145,40 @@ PS_OUTPUT ps_main(PS_INPUT input) if (C.a < A_one) C.a += A_one; } +#if PS_DATE < 10 && (((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)) + +#if PS_WRITE_RG == 1 + // Pseudo 16 bits access. + float rt_a = sample_from_rt(input_xy).g; +#else + float rt_a = sample_from_rt(input_xy).a; +#endif + +#if (PS_DATE & 3) == 1 + // DATM == 0: Pixel with alpha equal to 1 will failed + bool bad = (127.5f / 255.0f) < rt_a; +#elif (PS_DATE & 3) == 2 + // DATM == 1: Pixel with alpha equal to 0 will failed + bool bad = rt_a < (127.5f / 255.0f); +#endif + + if (bad) { +#if PS_ROV || PS_DATE >= 5 + DISCARD; +#else + return; +#endif + } + +#endif + + #if PS_DATE == 3 // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update // the bad alpha value so we must keep it. - int stencil_ceil = int(PrimMinTexture.Load(int3(input.p.xy, 0))); + int stencil_ceil = int(PrimMinTexture.Load(int3(input_xy, 0))); if (int(input.primid) > stencil_ceil) - discard; + DISCARD; #endif // Get first primitive that will write a failling alpha value @@ -1053,7 +1196,7 @@ PS_OUTPUT ps_main(PS_INPUT input) #else // Not primid DATE setup - ps_blend(C, alpha_blend, input.p.xy); + ps_blend(C, alpha_blend, input_xy); if (PS_SHUFFLE) { @@ -1142,7 +1285,7 @@ PS_OUTPUT ps_main(PS_INPUT input) // Color clamp/wrap needs to be done after sw blending and dithering ps_color_clamp_wrap(C.rgb); - ps_fbmask(C, input.p.xy); + ps_fbmask(C, input_xy); #if PS_AFAIL == 3 // RGB_ONLY // Use alpha blend factor to determine whether to update A. @@ -1157,12 +1300,15 @@ PS_OUTPUT ps_main(PS_INPUT input) #endif #endif // !PS_NO_COLOR -#endif // PS_DATE != 1/2 - -#if PS_ZCLAMP - output.depth = min(input.p.z, MaxDepthPS); +#if PS_ROV && ROV_DEPTH + if (rov_depth_test(input_xy, input_z)) + rov_write_rt(input_xy, C / float4(255.0f, 255.0f, 255.0f, PS_RTA_CORRECTION ? 128.0f : 255.0f)); +#elif PS_ROV + rov_write_rt(input_xy, C / float4(255.0f, 255.0f, 255.0f, PS_RTA_CORRECTION ? 128.0f : 255.0f)); #endif +#endif // PS_DATE != 1/2 + return output; } diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 72353f40d8cb4..cf96bc9be5ba7 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -295,11 +295,16 @@ void main() #define PS_ZCLAMP 0 #define PS_FEEDBACK_LOOP 0 #define PS_TEX_IS_FB 0 +#define PS_ROV 0 +#define PS_ZTST 0 +#define PS_ZWE 0 +#define PS_AFAIL 0 #endif #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) #define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1)) #define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED) +#define ROV_DEPTH (PS_ZTST != 0 || PS_ZWE != 0) #define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5)) @@ -348,18 +353,74 @@ layout(set = 1, binding = 0) uniform sampler2D Texture; layout(set = 1, binding = 1) uniform texture2D Palette; #endif -#if PS_FEEDBACK_LOOP_IS_NEEDED - #if defined(DISABLE_TEXTURE_BARRIER) - layout(set = 1, binding = 2) uniform texture2D RtSampler; - vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); } - #else - layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler; - vec4 sample_from_rt() { return subpassLoad(RtSampler); } +#if PS_ROV + layout(set = 1, binding = 2, rgba8) uniform restrict coherent image2D rovRT; + layout(set = 1, binding = 3, r32f) uniform restrict coherent image2D rovDS; + + vec4 rovRTValue; + uvec4 rovFbMask; + bool rovDiscarded; + bool rovDepthWrite; + + // We could use the demote-to-helper extension here I guess... + #define DISCARD rovDiscarded = true + + vec4 sample_from_rt() + { + return rovRTValue; + } + + void rov_read_rt() + { + rovRTValue = imageLoad(rovRT, ivec2(gl_FragCoord.xy)); + rovDiscarded = gl_HelperInvocation; + rovFbMask = FbMask; + rovDepthWrite = PS_ZWE != 0; + } + + void rov_write_rt(vec4 color) + { + imageStore(rovRT, ivec2(gl_FragCoord.xy), color); + } + + #if ROV_DEPTH + + bool rov_depth_test(float z) + { + bool zpass = true; + + #if PS_ZTST > 1 + float ds_z = imageLoad(rovDS, ivec2(gl_FragCoord.xy)).r; + #if PS_ZTST == 2 + zpass = (z >= ds_z); + #elif PS_ZTST == 3 + zpass = (z > ds_z); + #endif + #endif + + if (zpass && rovDepthWrite) + imageStore(rovDS, ivec2(gl_FragCoord.xy), vec4(z)); + + return zpass; + } + + #endif +#else + #if PS_FEEDBACK_LOOP_IS_NEEDED + #if defined(DISABLE_TEXTURE_BARRIER) + layout(set = 1, binding = 2) uniform texture2D RtSampler; + vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); } + #else + layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler; + vec4 sample_from_rt() { return subpassLoad(RtSampler); } + #endif + #endif + + #if PS_DATE > 0 + layout(set = 1, binding = 3) uniform texture2D PrimMinTexture; #endif -#endif -#if PS_DATE > 0 -layout(set = 1, binding = 3) uniform texture2D PrimMinTexture; + #define DISCARD discard #endif #if NEEDS_TEX @@ -978,8 +1039,16 @@ vec4 ps_color() void ps_fbmask(inout vec4 C) { #if PS_FBMASK - vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f); - C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); + #if PS_RTA_CORRECTION + vec4 RT = trunc(sample_from_rt() * vec4(255.0f, 255.0f, 255.0f, 128.0f) + 0.1f); + #else + vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f); + #endif + #if PS_ROV + C = vec4((uvec4(C) & ~rovFbMask) | (uvec4(RT) & rovFbMask)); + #else + C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); + #endif #endif } @@ -1207,13 +1276,27 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba) #endif } +#if PS_ROV +layout(pixel_interlock_ordered) in; + #if !ROV_DEPTH + layout(early_fragment_tests) in; + #endif +#endif + void main() { +#if PS_ROV + // TODO: Delay the interlocked portion if we don't need the RT early. + beginInvocationInterlockARB(); + rov_read_rt(); +#endif + #if PS_SCANMSK & 2 // fail depth test on prohibited lines if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1)) - discard; + DISCARD; #endif + #if PS_DATE >= 5 #if PS_WRITE_RG == 1 @@ -1240,7 +1323,7 @@ void main() #endif if (bad) { - discard; + DISCARD; } #endif // PS_DATE >= 5 @@ -1251,17 +1334,37 @@ void main() // the bad alpha value so we must keep it. if (gl_PrimitiveID > stencil_ceil) { - discard; + DISCARD; } #endif vec4 C = ps_color(); bool atst_pass = atst(C); -#if PS_AFAIL == 0 // KEEP or ATST off if (!atst_pass) - discard; -#endif + { + #if PS_ROV + if (PS_AFAIL == 0) + { + DISCARD; + } + else if (PS_AFAIL == 1) // FB_ONLY + { + rovDepthWrite = false; + } + else if (PS_AFAIL == 2) // ZB_ONLY + { + rovFbMask = uvec4(0xFFu); + } + else if (PS_AFAIL == 3) // RGB_ONLY + { + rovFbMask.a = 0xFFu; + rovDepthWrite = false; + } + #elif PS_AFAIL == 0 // KEEP or ATST off + DISCARD; + #endif + } // Must be done before alpha correction @@ -1400,9 +1503,21 @@ void main() #if !PS_NO_COLOR1 o_col1 = alpha_blend; #endif - #endif + #endif // !PS_NO_COLOR - #if PS_ZCLAMP + #if PS_ROV + if (!rovDiscarded) + { + #if ROV_DEPTH + float frag_depth = (PS_ZCLAMP != 0) ? min(gl_FragCoord.z, MaxDepthPS) : gl_FragCoord.z; + if (rov_depth_test(frag_depth)) + rov_write_rt(C / vec4(255.0f, 255.0f, 255.0f, (PS_RTA_CORRECTION != 0) ? 128.0f : 255.0f)); + #else + rov_write_rt(C / vec4(255.0f, 255.0f, 255.0f, (PS_RTA_CORRECTION != 0) ? 128.0f : 255.0f)); + #endif + } + endInvocationInterlockARB(); + #elif PS_ZCLAMP gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS); #endif diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index 271bc1e279134..99c81a965174f 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -226,6 +226,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.threadedPresentation, "EmuCore/GS", "DisableThreadedPresentation", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.exclusiveFullscreenControl, "EmuCore/GS", "ExclusiveFullscreenControl", -1, -1); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideTextureBarriers, "EmuCore/GS", "OverrideTextureBarriers", -1, -1); + SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.overrideRasterizerOrderViews, "EmuCore/GS", "OverrideRasterizerOrderViews", -1, -1); SettingWidgetBinder::BindWidgetToIntSetting( sif, m_ui.gsDumpCompression, "EmuCore/GS", "GSDumpCompression", static_cast(GSDumpCompressionMethod::Zstandard)); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.disableFramebufferFetch, "EmuCore/GS", "DisableFramebufferFetch", false); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index 802f395990af6..ab864a7767aae 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -2179,6 +2179,32 @@ + + + + Override Rasterizer Order Views: + + + + + + + + Automatic (Default) + + + + + Force Disabled + + + + + Force Enabled + + + + diff --git a/pcsx2/Config.h b/pcsx2/Config.h index c8e1a4a33346a..fe37711287233 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -715,6 +715,7 @@ struct Pcsx2Config GSBilinearDirtyMode UserHacks_BilinearHack = GSBilinearDirtyMode::Automatic; TriFiltering TriFilter = TriFiltering::Automatic; s8 OverrideTextureBarriers = -1; + s8 OverrideRasterizerOrderViews = -1; u8 CAS_Sharpness = 50; u8 ShadeBoost_Brightness = 50; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index c990f1683c432..5ee4c40b8ad26 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -370,6 +370,10 @@ struct alignas(16) GSHWDrawConfig // Scan mask u32 scanmsk : 2; + + u32 rov : 1; + u32 ztst : 2; + u32 zwe : 1; }; struct @@ -391,6 +395,17 @@ struct alignas(16) GSHWDrawConfig return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt; } + __fi bool UseROV() const + { + return IsFeedbackLoop() || afail != 0; + } + + __fi bool NeedsROVDepth() const + { + // DATE, ATST, SCANMSK all discard. + return ((date > 0 && date != 3) || atst || scanmsk); + } + /// Disables color output from the pixel shader, this is done when all channels are masked. __fi void DisableColorOutput() { @@ -684,6 +699,7 @@ struct alignas(16) GSHWDrawConfig DestinationAlphaMode destination_alpha; SetDATM datm : 2; bool line_expand : 1; + bool rov_depth : 1; struct AlphaPass { @@ -750,10 +766,13 @@ class GSDevice : public GSAlignedClass<32> bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE. bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening. bool test_and_sample_depth: 1; ///< Supports concurrently binding the depth-stencil buffer for sampling and depth testing. + bool raster_order_view : 1; ///< Supports raster ordered views, can avoid barriers. FeatureSupport() { memset(this, 0, sizeof(*this)); } + + __fi bool CanSampleFromFB() const { return texture_barrier || raster_order_view; } }; struct MultiStretchRect diff --git a/pcsx2/GS/Renderers/Common/GSTexture.cpp b/pcsx2/GS/Renderers/Common/GSTexture.cpp index bc8a9e61c0522..1ccc31e101e82 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.cpp +++ b/pcsx2/GS/Renderers/Common/GSTexture.cpp @@ -94,7 +94,8 @@ u32 GSTexture::GetCompressedBytesPerBlock(Format format) 1, // Invalid 4, // Color/RGBA8 8, // HDRColor/RGBA16 - 4, // DepthStencil + 4, // ColorDepth/RGBA32F + 4, // DepthStencil/D32FS8 1, // UNorm8/R8 2, // UInt16/R16UI 4, // UInt32/R32UI diff --git a/pcsx2/GS/Renderers/Common/GSTexture.h b/pcsx2/GS/Renderers/Common/GSTexture.h index 1b58e168492b0..bbca7c05d6454 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.h +++ b/pcsx2/GS/Renderers/Common/GSTexture.h @@ -32,6 +32,7 @@ class GSTexture Color, ///< Standard (RGBA8) color texture HDRColor, ///< Color texture with more bits for colclip emulation (RGBA16Unorm) DepthStencil, ///< Depth stencil texture + ColorDepth, ///< Float depth texture used for software depth (ROV) UNorm8, ///< A8UNorm texture for paletted textures and the OSD font UInt16, ///< UInt16 texture for reading back 16-bit depth UInt32, ///< UInt32 texture for reading back 24 and 32-bit depth diff --git a/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp b/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp index eedc792a132f2..ffffa249ff756 100644 --- a/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp +++ b/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp @@ -55,12 +55,13 @@ bool D3D11ShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const bool D3D11ShaderCache::Open(D3D_FEATURE_LEVEL feature_level, bool debug) { - m_feature_level = feature_level; + // DX11 doesn't support SM5.1, but can still use ROVs... + m_feature_level = std::min(feature_level, D3D_FEATURE_LEVEL_11_0); m_debug = debug; if (!GSConfig.DisableShaderCache) { - const std::string base_filename = GetCacheBaseFileName(feature_level, debug); + const std::string base_filename = GetCacheBaseFileName(m_feature_level, debug); const std::string index_filename = base_filename + ".idx"; const std::string blob_filename = base_filename + ".bin"; @@ -206,6 +207,9 @@ std::string D3D11ShaderCache::GetCacheBaseFileName(D3D_FEATURE_LEVEL feature_lev case D3D_FEATURE_LEVEL_11_0: base_filename += "sm50"; break; + case D3D_FEATURE_LEVEL_11_1: + base_filename += "sm51"; + break; default: base_filename += "unk"; break; diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index fa55574974ff3..3f8ba4a3ada06 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -46,6 +46,7 @@ GSDevice11::GSDevice11() m_features.primitive_id = true; m_features.texture_barrier = false; + m_features.framebuffer_fetch = false; m_features.provoking_vertex_last = false; m_features.point_expand = false; m_features.line_expand = false; @@ -56,6 +57,7 @@ GSDevice11::GSDevice11() m_features.stencil_buffer = true; m_features.cas_sharpening = true; m_features.test_and_sample_depth = false; + m_features.raster_order_view = false; } GSDevice11::~GSDevice11() = default; @@ -95,10 +97,11 @@ bool GSDevice11::Create() wil::com_ptr_nothrow dxgi_adapter = D3D::GetAdapterByName(m_dxgi_factory.get(), GSConfig.Adapter); - static constexpr std::array requested_feature_levels = {{ + static constexpr const std::array requested_feature_levels = { + D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_0, - }}; + }; wil::com_ptr_nothrow temp_dev; wil::com_ptr_nothrow temp_ctx; @@ -552,6 +555,12 @@ void GSDevice11::Destroy() if (m_state.dsv) m_state.dsv->Release(); + for (ID3D11UnorderedAccessView* uav : m_state.uav) + { + if (uav) + uav->Release(); + } + m_shader_cache.Close(); #ifdef REPORT_LEAKED_OBJECTS @@ -595,6 +604,16 @@ void GSDevice11::SetFeatures(IDXGIAdapter1* adapter) m_features.vs_expand = false; } } + + D3D11_FEATURE_DATA_D3D11_OPTIONS2 opts2 = {}; + if (m_feature_level >= D3D_FEATURE_LEVEL_11_1 && + SUCCEEDED(m_dev->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &opts2, sizeof(opts2))) && + opts2.ROVsSupported && GSConfig.OverrideRasterizerOrderViews != 0) + { + DevCon.WriteLn("(GSDevice11) Enabling ROV"); + m_features.texture_barrier = true; + m_features.raster_order_view = true; + } } int GSDevice11::GetMaxTextureSize() const @@ -1184,7 +1203,9 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height switch (type) { case GSTexture::Type::RenderTarget: - desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; + desc.BindFlags = m_features.raster_order_view ? + (D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS) : + (D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); break; case GSTexture::Type::DepthStencil: desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE; @@ -1737,6 +1758,10 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb); sm.AddMacro("PS_NO_COLOR", sel.no_color); sm.AddMacro("PS_NO_COLOR1", sel.no_color1); + sm.AddMacro("PS_ROV", sel.rov); + sm.AddMacro("PS_ZTST", sel.ztst); + sm.AddMacro("PS_ZWE", sel.zwe); + sm.AddMacro("PS_DATE", sel.date); wil::com_ptr_nothrow ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main"); i = m_ps.try_emplace(sel, std::move(ps)).first; @@ -2387,6 +2412,16 @@ void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, u8 bf) void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) { + // squash warnings + for (u32 i = 0; i < MAX_TEXTURES; i++) + { + if ((rt && m_state.ps_sr_views[i] == *(GSTexture11*)rt) || (ds && m_state.ps_sr_views[i] == *(GSTexture11*)ds)) + { + m_state.ps_sr_views[i] = nullptr; + m_ctx->PSSetShaderResources(i, 1, &m_state.ps_sr_views[i]); + } + } + ID3D11RenderTargetView* rtv = nullptr; ID3D11DepthStencilView* dsv = nullptr; @@ -2401,7 +2436,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector dsv = *static_cast(ds); } - const bool changed = (m_state.rt_view != rtv || m_state.dsv != dsv); + const bool changed = (m_state.rt_view != rtv || m_state.dsv != dsv || m_state.uav[0] || m_state.uav[1]); g_perfmon.Put(GSPerfMon::RenderPasses, static_cast(changed)); if (m_state.rt_view != rtv) @@ -2421,7 +2456,27 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector m_state.dsv = dsv; } if (changed) - m_ctx->OMSetRenderTargets(1, &rtv, dsv); + { + if (m_state.uav[0] || m_state.uav[1]) + { + m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(1, &rtv, dsv, + 0, 0, nullptr, nullptr); + if (m_state.uav[0]) + { + m_state.uav[0]->Release(); + m_state.uav[0] = nullptr; + } + if (m_state.uav[1]) + { + m_state.uav[1]->Release(); + m_state.uav[1] = nullptr; + } + } + else + { + m_ctx->OMSetRenderTargets(1, &rtv, dsv); + } + } if (rt || ds) { @@ -2454,6 +2509,83 @@ void GSDevice11::SetScissor(const GSVector4i& scissor) } } +void GSDevice11::OMSetUAVs(GSTexture* rt, GSTexture* ds, bool rov_depth, const GSVector4i* scissor) +{ + // squash warnings + for (u32 i = 0; i < MAX_TEXTURES; i++) + { + if ((rt && m_state.ps_sr_views[i] == *(GSTexture11*)rt) || (ds && m_state.ps_sr_views[i] == *(GSTexture11*)ds)) + { + m_state.ps_sr_views[i] = nullptr; + m_ctx->PSSetShaderResources(i, 1, &m_state.ps_sr_views[i]); + } + } + + ID3D11DepthStencilView* dsv = nullptr; + if (ds && !rov_depth) + dsv = *(GSTexture11*)ds; + + std::array uavs; + uavs[0] = rt ? static_cast(*(GSTexture11*)rt) : nullptr; + uavs[1] = rov_depth ? static_cast(*(GSTexture11*)ds) : nullptr; + + if (m_state.rt_view || m_state.dsv != dsv || m_state.uav != uavs) + { + if (m_state.rt_view) + m_state.rt_view->Release(); + m_state.rt_view = nullptr; + + if (m_state.dsv != dsv) + { + if (m_state.dsv) + m_state.dsv->Release(); + if (dsv) + dsv->AddRef(); + m_state.dsv = dsv; + } + + for (u32 i = 0; i < uavs.size(); i++) + { + if (m_state.uav[i] != uavs[i]) + { + if (m_state.uav[i]) + m_state.uav[i]->Release(); + m_state.uav[i] = uavs[i]; + if (uavs[i]) + uavs[i]->AddRef(); + } + } + + m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(0, nullptr, dsv, + 0, 2, uavs.data(), nullptr); + } + + const GSVector2i size = rt ? rt->GetSize() : ds->GetSize(); + if (m_state.viewport != size) + { + m_state.viewport = size; + + D3D11_VIEWPORT vp; + memset(&vp, 0, sizeof(vp)); + + vp.TopLeftX = 0.0f; + vp.TopLeftY = 0.0f; + vp.Width = (float)size.x; + vp.Height = (float)size.y; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + + m_ctx->RSSetViewports(1, &vp); + } + + if (!m_state.scissor.eq(*scissor)) + { + m_state.scissor = *scissor; + + m_ctx->RSSetScissorRects(1, reinterpret_cast(scissor)); + } +} + void GSDevice11::ShaderMacro::AddMacro(const char* n, int d) { AddMacro(n, std::to_string(d)); @@ -2475,18 +2607,8 @@ D3D_SHADER_MACRO* GSDevice11::ShaderMacro::GetPtr() return (D3D_SHADER_MACRO*)mout.data(); } -/// Checks that we weren't sent things we declared we don't support -/// Clears things we don't support that can be quietly disabled -static void preprocessSel(GSDevice11::PSSelector& sel) -{ - pxAssert(sel.write_rg == 0); // Not supported, shouldn't be sent -} - void GSDevice11::RenderHW(GSHWDrawConfig& config) { - pxAssert(!config.require_full_barrier); // We always specify no support so it shouldn't request this - preprocessSel(config.ps); - GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize(); GSTexture* primid_tex = nullptr; @@ -2499,7 +2621,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) StretchRect(config.rt, GSVector4(config.drawarea) / GSVector4(rtsize).xyxy(), primid_tex, GSVector4(config.drawarea), m_date.primid_init_ps[static_cast(config.datm)].get(), nullptr, false); } - else if (config.destination_alpha != GSHWDrawConfig::DestinationAlphaMode::Off) + else if (config.destination_alpha != GSHWDrawConfig::DestinationAlphaMode::Off && config.destination_alpha != GSHWDrawConfig::DestinationAlphaMode::Full) { const GSVector4 src = GSVector4(config.drawarea) / GSVector4(config.ds->GetSize()).xyxy(); const GSVector4 dst = src * 2.0f - 1.0f; @@ -2585,7 +2707,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) } GSTexture* rt_copy = nullptr; - if (config.require_one_barrier || (config.tex && config.tex == config.rt)) // Used as "bind rt" flag when texture barrier is unsupported + if (!config.ps.rov && (config.require_one_barrier || (config.tex && config.tex == config.rt))) // Used as "bind rt" flag when texture barrier is unsupported { // Bind the RT.This way special effect can use it. // Do not always bind the rt when it's not needed, @@ -2620,8 +2742,16 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) PSSetShaderResource(3, primid_tex); } - SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant); - OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor); + if (config.ps.rov) + SetupOM(config.depth, OMBlendSelector(), 0); + else + SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant); + + if (config.ps.rov) + OMSetUAVs(config.rt, config.ds, config.rov_depth, &config.scissor); + else + OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor); + DrawIndexedPrimitive(); if (config.blend_second_pass.enable) @@ -2635,7 +2765,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) if (config.alpha_second_pass.enable) { - preprocessSel(config.alpha_second_pass.ps); if (config.cb_ps.FogColor_AREF.a != config.alpha_second_pass.ps_aref) { config.cb_ps.FogColor_AREF.a = config.alpha_second_pass.ps_aref; diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.h b/pcsx2/GS/Renderers/DX11/GSDevice11.h index 8a7777a3c2066..6c5f11e213b64 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.h +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.h @@ -157,6 +157,8 @@ class GSDevice11 final : public GSDevice u8 bf; ID3D11RenderTargetView* rt_view; ID3D11DepthStencilView* dsv; + + std::array uav; } m_state; std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; @@ -332,6 +334,7 @@ class GSDevice11 final : public GSDevice void OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref); void OMSetBlendState(ID3D11BlendState* bs, u8 bf); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = nullptr); + void OMSetUAVs(GSTexture* rt, GSTexture* ds, bool rov_depth, const GSVector4i* scissor); void SetViewport(const GSVector2i& viewport); void SetScissor(const GSVector4i& scissor); diff --git a/pcsx2/GS/Renderers/DX11/GSTexture11.cpp b/pcsx2/GS/Renderers/DX11/GSTexture11.cpp index 5dfece588322e..ae2836854c45c 100644 --- a/pcsx2/GS/Renderers/DX11/GSTexture11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSTexture11.cpp @@ -31,6 +31,7 @@ DXGI_FORMAT GSTexture11::GetDXGIFormat(Format format) case GSTexture::Format::Color: return DXGI_FORMAT_R8G8B8A8_UNORM; case GSTexture::Format::HDRColor: return DXGI_FORMAT_R16G16B16A16_UNORM; case GSTexture::Format::DepthStencil: return DXGI_FORMAT_R32G8X24_TYPELESS; + case GSTexture::Format::ColorDepth: return DXGI_FORMAT_R32_FLOAT; case GSTexture::Format::UNorm8: return DXGI_FORMAT_A8_UNORM; case GSTexture::Format::UInt16: return DXGI_FORMAT_R16_UINT; case GSTexture::Format::UInt32: return DXGI_FORMAT_R32_UINT; @@ -165,8 +166,10 @@ GSTexture11::operator ID3D11DepthStencilView*() GSTexture11::operator ID3D11UnorderedAccessView*() { - if (!m_uav) + if (!m_uav && m_desc.BindFlags & D3D11_BIND_UNORDERED_ACCESS) + { GSDevice11::GetInstance()->GetD3DDevice()->CreateUnorderedAccessView(m_texture.get(), nullptr, m_uav.put()); + } return m_uav.get(); } diff --git a/pcsx2/GS/Renderers/DX12/D3D12ShaderCache.cpp b/pcsx2/GS/Renderers/DX12/D3D12ShaderCache.cpp index d310f9904ffdd..d05b710ed63d6 100644 --- a/pcsx2/GS/Renderers/DX12/D3D12ShaderCache.cpp +++ b/pcsx2/GS/Renderers/DX12/D3D12ShaderCache.cpp @@ -264,6 +264,9 @@ std::string D3D12ShaderCache::GetCacheBaseFileName(const std::string_view& type, case D3D_FEATURE_LEVEL_11_0: base_filename += "sm50"; break; + case D3D_FEATURE_LEVEL_12_0: + base_filename += "sm51"; + break; default: base_filename += "unk"; break; diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 46d1cf7d23acd..19a34ce06a6aa 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -9,6 +9,8 @@ #include "GS/Renderers/DX12/GSDevice12.h" #include "GS/Renderers/DX12/D3D12Builders.h" #include "GS/Renderers/DX12/D3D12ShaderCache.h" +#include "GS/GSState.h" + #include "Host.h" #include "ShaderCacheVersion.h" @@ -178,7 +180,16 @@ bool GSDevice12::CreateDevice() } // Create the actual device. - hr = D3D12CreateDevice(m_adapter.get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)); + static constexpr const D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0}; + for (const D3D_FEATURE_LEVEL fl : feature_levels) + { + hr = D3D12CreateDevice(m_adapter.get(), fl, IID_PPV_ARGS(&m_device)); + if (SUCCEEDED(hr)) + { + m_feature_level = fl; + break; + } + } if (FAILED(hr)) { Console.Error("Failed to create D3D12 device: %08X", hr); @@ -1210,6 +1221,7 @@ bool GSDevice12::CheckFeatures() m_features.cas_sharpening = true; m_features.test_and_sample_depth = false; m_features.vs_expand = !GSConfig.DisableVertexShaderExpand; + m_features.raster_order_view = false; m_features.dxt_textures = SupportsTextureFormat(DXGI_FORMAT_BC1_UNORM) && SupportsTextureFormat(DXGI_FORMAT_BC2_UNORM) && @@ -1221,6 +1233,16 @@ bool GSDevice12::CheckFeatures() DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported)); m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE); + + D3D12_FEATURE_DATA_D3D12_OPTIONS opts = {}; + if (m_feature_level >= D3D_FEATURE_LEVEL_11_1 && + SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &opts, sizeof(opts))) && + opts.ROVsSupported && GSConfig.OverrideRasterizerOrderViews != 0) + { + DevCon.WriteLn("(GSDevice12) Enabling ROV"); + m_features.raster_order_view = true; + } + return true; } @@ -1255,6 +1277,7 @@ void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_f DXGI_FORMAT_UNKNOWN}, // HDRColor {DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_D32_FLOAT_S8X24_UINT}, // DepthStencil + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN}, // ColorDepth {DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_UNKNOWN}, // UNorm8 {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_UNKNOWN}, // UInt16 {DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_UNKNOWN}, // UInt32 @@ -1284,7 +1307,14 @@ GSTexture* GSDevice12::CreateSurface(GSTexture::Type type, int width, int height DXGI_FORMAT dxgi_format, srv_format, rtv_format, dsv_format; LookupNativeFormat(format, &dxgi_format, &srv_format, &rtv_format, &dsv_format); - const DXGI_FORMAT uav_format = (type == GSTexture::Type::RWTexture) ? dxgi_format : DXGI_FORMAT_UNKNOWN; + const DXGI_FORMAT uav_format = + (type == GSTexture::Type::RWTexture) ? + dxgi_format : + (m_features.raster_order_view && + (type == GSTexture::Type::RenderTarget && + (format == GSTexture::Format::Color || format == GSTexture::Format::ColorDepth)) ? + srv_format : + DXGI_FORMAT_UNKNOWN); std::unique_ptr tex(GSTexture12::Create(type, format, clamped_width, clamped_height, levels, dxgi_format, srv_format, rtv_format, dsv_format, uav_format)); @@ -1397,8 +1427,13 @@ void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top), int(dRect.right - dRect.left), int(dRect.bottom - dRect.top)); - DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, - dTex ? m_convert[static_cast(shader)].get() : m_present[static_cast(shader)].get(), linear, + // Hack for ROV + ID3D12PipelineState* pipeline = + (dTex ? ((dTex->GetFormat() == GSTexture::Format::ColorDepth) ? + m_rov_depth_begin_pipeline.get() : m_convert[static_cast(shader)].get()) : + m_present[static_cast(shader)].get()); + DoStretchRect( + static_cast(sTex), sRect, static_cast(dTex), dRect, pipeline, linear, ShaderConvertWriteMask(shader) == 0xf); } @@ -1580,7 +1615,9 @@ void GSDevice12::DoMultiStretchRects( pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf); int rta_bit = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; - SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba | rta_bit].get() : + SetPipeline((dTex->GetFormat() == GSTexture::Format::ColorDepth) ? + m_rov_depth_begin_pipeline.get() : + (rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba | rta_bit].get() : m_convert[static_cast(shader)].get()); if (ApplyUtilityState()) @@ -2127,12 +2164,12 @@ void GSDevice12::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector { GSTexture12* vkRt = static_cast(rt); GSTexture12* vkDs = static_cast(ds); - pxAssert(vkRt || vkDs); if (m_current_render_target != vkRt || m_current_depth_target != vkDs) { // framebuffer change EndRenderPass(); + m_dirty_flags |= DIRTY_FLAG_RENDER_TARGET; } else if (InRenderPass()) { @@ -2165,11 +2202,14 @@ void GSDevice12::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector vkDs->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE); } - // This is used to set/initialize the framebuffer for tfx rendering. - const GSVector2i size = vkRt ? vkRt->GetSize() : vkDs->GetSize(); - const D3D12_VIEWPORT vp{0.0f, 0.0f, static_cast(size.x), static_cast(size.y), 0.0f, 1.0f}; + if (vkRt || vkDs) + { + // This is used to set/initialize the framebuffer for tfx rendering. + const GSVector2i size = vkRt ? vkRt->GetSize() : vkDs->GetSize(); + const D3D12_VIEWPORT vp{ 0.0f, 0.0f, static_cast(size.x), static_cast(size.y), 0.0f, 1.0f }; + SetViewport(vp); + } - SetViewport(vp); SetScissor(scissor); } @@ -2287,7 +2327,7 @@ bool GSDevice12::CreateNullTexture() { m_null_texture = GSTexture12::Create(GSTexture::Type::Texture, GSTexture::Format::Color, 1, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN); + DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM); if (!m_null_texture) return false; @@ -2363,6 +2403,10 @@ bool GSDevice12::CreateRootSignatures() rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL); + + if (m_features.raster_order_view) + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); + if (!(m_tfx_root_signature = rsb.Create())) return false; D3D12::SetObjectName(m_tfx_root_signature.get(), "TFX root signature"); @@ -2496,6 +2540,15 @@ bool GSDevice12::CompileConvertPipelines() D3D12::SetObjectName(m_color_copy[j].get(), TinyString::from_format("Color copy pipeline (r={}, g={}, b={}, a={})", j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u)); } + + // ROV depth begin + gpb.SetRenderTarget(0, DXGI_FORMAT_R32_FLOAT); + gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN); + gpb.SetNoBlendingState(); + m_rov_depth_begin_pipeline = gpb.Create(m_device.get(), m_shader_cache, false); + if (!m_rov_depth_begin_pipeline) + return false; + D3D12::SetObjectName(m_rov_depth_begin_pipeline.get(), "Convert depth to ROV"); } else if (i == ShaderConvert::HDR_INIT || i == ShaderConvert::HDR_RESOLVE) { @@ -2740,6 +2793,7 @@ void GSDevice12::DestroyResources() m_hdr_setup_pipelines = {}; m_hdr_finish_pipelines = {}; m_date_image_setup_pipelines = {}; + m_rov_depth_begin_pipeline.reset(); m_fxaa_pipeline.reset(); m_shadeboost_pipeline.reset(); m_imgui_pipeline.reset(); @@ -2875,6 +2929,9 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb); sm.AddMacro("PS_NO_COLOR", sel.no_color); sm.AddMacro("PS_NO_COLOR1", sel.no_color1); + sm.AddMacro("PS_ROV", sel.rov); + sm.AddMacro("PS_ZTST", sel.ztst); + sm.AddMacro("PS_ZWE", sel.zwe); ComPtr ps(m_shader_cache.GetPixelShader(m_tfx_source, sm.GetPtr(), "ps_main")); it = m_tfx_pixel_shaders.emplace(sel, std::move(ps)).first; @@ -3022,8 +3079,12 @@ void GSDevice12::InitializeState() { for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++) m_tfx_textures[i] = m_null_texture->GetSRVDescriptor(); + m_tfx_sampler_sel = GSHWDrawConfig::SamplerSelector::Point().key; + for (u32 i = 0; i < NUM_TFX_UAVS; i++) + m_tfx_uavs[i] = m_null_texture->GetUAVDescriptor(); + InvalidateCachedState(); } @@ -3202,6 +3263,35 @@ void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel) m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS; } +void GSDevice12::PSSetUAV(u32 index, GSTexture* uav) +{ + D3D12DescriptorHandle handle; + if (uav) + { + GSTexture12* dtex = static_cast(uav); + if (dtex->GetResourceState() != D3D12_RESOURCE_STATE_UNORDERED_ACCESS && InRenderPass()) + { + GL_INS("Ending render pass due to resource transition"); + EndRenderPass(); + } + + dtex->CommitClear(); + dtex->TransitionToState(D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + dtex->SetUseFenceCounter(GetCurrentFenceValue()); + handle = dtex->GetUAVDescriptor(); + } + else + { + handle = m_null_texture->GetUAVDescriptor(); + } + + if (m_tfx_uavs[index] == handle) + return; + + m_tfx_uavs[index] = handle; + m_dirty_flags |= DIRTY_FLAG_TFX_UAVS; +} + void GSDevice12::SetUtilityRootSignature() { if (m_current_root_signature == RootSignature::Utility) @@ -3271,6 +3361,14 @@ void GSDevice12::UnbindTexture(GSTexture12* tex) m_dirty_flags |= DIRTY_FLAG_TFX_TEXTURES; } } + for (u32 i = 0; i < NUM_TFX_UAVS; i++) + { + if (m_tfx_uavs[i] && m_tfx_uavs[i] == tex->GetUAVDescriptor()) + { + m_tfx_uavs[i] = m_null_texture->GetUAVDescriptor(); + m_dirty_flags |= DIRTY_FLAG_TFX_UAVS; + } + } if (m_current_render_target == tex) { EndRenderPass(); @@ -3376,11 +3474,8 @@ void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_b if (m_in_render_pass) EndRenderPass(); - // we're setting the RT here. - m_dirty_flags &= ~DIRTY_FLAG_RENDER_TARGET; - m_in_render_pass = true; - D3D12_RENDER_PASS_RENDER_TARGET_DESC rt = {}; + if (m_current_render_target) { rt.cpuDescriptor = m_current_render_target->GetWriteDescriptor(); @@ -3416,8 +3511,47 @@ void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_b } } - GetCommandList()->BeginRenderPass(m_current_render_target ? 1 : 0, - m_current_render_target ? &rt : nullptr, m_current_depth_target ? &ds : nullptr, D3D12_RENDER_PASS_FLAG_NONE); + if (m_features.raster_order_view) + { + // Don't use render passes when using ROV. + // But we need to commit any clears/discards. + if (color_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD) + { + GetCommandList()->DiscardResource(m_current_render_target->GetResource(), nullptr); + m_current_render_target->SetState(GSTexture::State::Dirty); + } + else if (color_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR) + { + GetCommandList()->ClearRenderTargetView(m_current_render_target->GetWriteDescriptor(), + rt.BeginningAccess.Clear.ClearValue.Color, 0, nullptr); + m_current_render_target->SetState(GSTexture::State::Dirty); + } + if (depth_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD) + { + GetCommandList()->DiscardResource(m_current_depth_target->GetResource(), nullptr); + m_current_depth_target->SetState(GSTexture::State::Dirty); + } + else if (depth_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR || stencil_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR) + { + const u32 flags = + ((depth_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR) ? D3D12_CLEAR_FLAG_DEPTH : 0) | + ((stencil_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR) ? D3D12_CLEAR_FLAG_STENCIL : 0); + GetCommandList()->ClearDepthStencilView(m_current_depth_target->GetWriteDescriptor(), + static_cast(flags), ds.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth, ds.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Stencil, + 0, nullptr); + m_current_depth_target->SetState(GSTexture::State::Dirty); + } + } + else + { + // we're setting the RT here. + m_dirty_flags &= ~DIRTY_FLAG_RENDER_TARGET; + m_in_render_pass = true; + + GetCommandList()->BeginRenderPass( + m_current_render_target ? 1 : 0, m_current_render_target ? &rt : nullptr, + m_current_depth_target ? &ds : nullptr, D3D12_RENDER_PASS_FLAG_NONE); + } } void GSDevice12::EndRenderPass() @@ -3496,15 +3630,9 @@ __ri void GSDevice12::ApplyBaseState(u32 flags, ID3D12GraphicsCommandList* cmdli if (flags & DIRTY_FLAG_RENDER_TARGET) { - if (m_current_render_target) - { - cmdlist->OMSetRenderTargets(1, &m_current_render_target->GetWriteDescriptor().cpu_handle, FALSE, - m_current_depth_target ? &m_current_depth_target->GetWriteDescriptor().cpu_handle : nullptr); - } - else if (m_current_depth_target) - { - cmdlist->OMSetRenderTargets(0, nullptr, FALSE, &m_current_depth_target->GetWriteDescriptor().cpu_handle); - } + cmdlist->OMSetRenderTargets(static_cast(m_current_render_target != nullptr), + m_current_render_target ? &m_current_render_target->GetWriteDescriptor().cpu_handle : nullptr, FALSE, + m_current_depth_target ? &m_current_depth_target->GetWriteDescriptor().cpu_handle : nullptr); } } @@ -3592,6 +3720,17 @@ bool GSDevice12::ApplyTFXState(bool already_execed) flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2; } + if (m_features.raster_order_view && flags & DIRTY_FLAG_TFX_UAVS) + { + if (!GetTextureGroupDescriptors(&m_tfx_uav_handle_gpu, m_tfx_uavs.data(), NUM_TFX_UAVS)) + { + ExecuteCommandListAndRestartRenderPass(false, "Ran out of TFX UAV descriptor descriptor groups"); + return ApplyTFXState(true); + } + + flags |= DIRTY_FLAG_UAV_DESCRIPTOR_TABLE; + } + ID3D12GraphicsCommandList* cmdlist = GetCommandList(); if (m_current_root_signature != RootSignature::TFX) @@ -3599,7 +3738,7 @@ bool GSDevice12::ApplyTFXState(bool already_execed) m_current_root_signature = RootSignature::TFX; flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | - DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_PIPELINE; + DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_UAV_DESCRIPTOR_TABLE | DIRTY_FLAG_PIPELINE; cmdlist->SetGraphicsRootSignature(m_tfx_root_signature.get()); } @@ -3618,6 +3757,8 @@ bool GSDevice12::ApplyTFXState(bool already_execed) cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS, m_tfx_samplers_handle_gpu); if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2) cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES, m_tfx_rt_textures_handle_gpu); + if (m_features.raster_order_view && flags & DIRTY_FLAG_UAV_DESCRIPTOR_TABLE) + cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_UAVS, m_tfx_uav_handle_gpu); ApplyBaseState(flags, cmdlist); return true; @@ -3776,13 +3917,13 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) if (stencil_DATE) SetupDATE(config.rt, config.ds, config.datm, config.drawarea); + // figure out the pipeline + UpdateHWPipelineSelector(config); + // stream buffer in first, in case we need to exec SetVSConstantBuffer(config.cb_vs); SetPSConstantBuffer(config.cb_ps); - // figure out the pipeline - UpdateHWPipelineSelector(config); - // bind textures before checking the render pass, in case we need to transition them PipelineSelector& pipe = m_pipeline_selector; if (config.tex) @@ -3879,29 +4020,68 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) PSSetShaderResource(0, nullptr, false); } - // avoid restarting the render pass just to switch from rt+depth to rt and vice versa - if (m_in_render_pass && (m_current_render_target == draw_rt || m_current_depth_target == draw_ds)) + if (config.ps.rov) { - // avoid restarting the render pass just to switch from rt+depth to rt and vice versa - // keep the depth even if doing HDR draws, because the next draw will probably re-enable depth - if (!draw_rt && m_current_render_target && config.tex != m_current_render_target && - m_current_render_target->GetSize() == draw_ds->GetSize()) + EndRenderPass(); + + draw_rt->CommitClear(); + PSSetUAV(0, draw_rt); + draw_rt = nullptr; + + if (draw_ds) + { + draw_ds->CommitClear(); + if (config.rov_depth) + { + PSSetUAV(1, config.ds); + draw_ds = nullptr; + } + } + + OMSetRenderTargets(nullptr, draw_ds, config.scissor); + if (!draw_ds) { - draw_rt = m_current_render_target; - m_pipeline_selector.rt = true; + // Still need to set viewport. + const D3D12_VIEWPORT vp{ 0.0f, 0.0f, static_cast(config.rt->GetWidth()), static_cast(config.rt->GetHeight()), 0.0f, 1.0f }; + SetViewport(vp); + SetScissor(config.scissor); } } - else if (!draw_ds && m_current_depth_target && config.tex != m_current_depth_target && - m_current_depth_target->GetSize() == draw_rt->GetSize()) + else { - draw_ds = m_current_depth_target; - m_pipeline_selector.ds = true; - } + // avoid restarting the render pass just to switch from rt+depth to rt and vice versa + if (m_in_render_pass && (m_current_render_target == draw_rt || m_current_depth_target == draw_ds)) + { + // avoid restarting the render pass just to switch from rt+depth to rt and vice versa + // keep the depth even if doing HDR draws, because the next draw will probably re-enable depth + if (!draw_rt && m_current_render_target && config.tex != m_current_render_target && + m_current_render_target->GetSize() == draw_ds->GetSize()) + { + draw_rt = m_current_render_target; + m_pipeline_selector.rt = true; + } + } + else if (!draw_ds && m_current_depth_target && config.tex != m_current_depth_target && + m_current_depth_target->GetSize() == draw_rt->GetSize()) + { + draw_ds = m_current_depth_target; + m_pipeline_selector.ds = true; + } + // avoid restarting the render pass just to switch from rt+depth to rt and vice versa + if (m_in_render_pass && !hdr_rt && !draw_ds && m_current_depth_target && m_current_render_target == draw_rt && + config.tex != m_current_depth_target && m_current_depth_target->GetSize() == draw_rt->GetSize()) + { + draw_ds = m_current_depth_target; + m_pipeline_selector.ds = true; + m_pipeline_selector.dss.ztst = ZTST_ALWAYS; + m_pipeline_selector.dss.zwe = false; + } - OMSetRenderTargets(draw_rt, draw_ds, config.scissor); + OMSetRenderTargets(draw_rt, draw_ds, config.scissor); + } // Begin render pass if new target or out of the area. - if (!m_in_render_pass) + if (!config.ps.rov && !m_in_render_pass) { GSVector4 clear_color = draw_rt ? draw_rt->GetUNormClearColor() : GSVector4::zero(); if (pipe.ps.hdr) @@ -4017,8 +4197,9 @@ void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config) m_pipeline_selector.bs.constant = 0; // don't dupe states with different alpha values m_pipeline_selector.cms.key = config.colormask.key; m_pipeline_selector.topology = static_cast(config.topology); - m_pipeline_selector.rt = config.rt != nullptr; - m_pipeline_selector.ds = config.ds != nullptr; + m_pipeline_selector.rt = config.rt != nullptr && !config.ps.rov; + m_pipeline_selector.ds = config.ds != nullptr && !config.rov_depth; + m_pipeline_selector.rov = m_features.raster_order_view; } void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config) diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.h b/pcsx2/GS/Renderers/DX12/GSDevice12.h index 49abdf47b06f3..f517673c0e953 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.h +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.h @@ -191,6 +191,7 @@ class GSDevice12 final : public GSDevice u32 topology : 2; u32 rt : 1; u32 ds : 1; + u32 rov : 1; }; u32 key; @@ -258,6 +259,7 @@ class GSDevice12 final : public GSDevice NUM_TFX_RT_TEXTURES = 2, NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES, NUM_TFX_SAMPLERS = 1, + NUM_TFX_UAVS = 2, NUM_UTILITY_TEXTURES = 1, NUM_UTILITY_SAMPLERS = 1, CONVERT_PUSH_CONSTANTS_SIZE = 96, @@ -273,6 +275,7 @@ class GSDevice12 final : public GSDevice TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 3, TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 4, TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES = 5, + TFX_ROOT_SIGNATURE_PARAM_PS_UAVS = 6, UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0, UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1, @@ -318,6 +321,7 @@ class GSDevice12 final : public GSDevice std::array, 2> m_hdr_setup_pipelines{}; // [depth] std::array, 2> m_hdr_finish_pipelines{}; // [depth] std::array, 4>, 2> m_date_image_setup_pipelines{}; // [depth][datm] + ComPtr m_rov_depth_begin_pipeline; ComPtr m_fxaa_pipeline; ComPtr m_shadeboost_pipeline; ComPtr m_imgui_pipeline; @@ -458,6 +462,7 @@ class GSDevice12 final : public GSDevice void PSSetShaderResource(int i, GSTexture* sr, bool check_state); void PSSetSampler(GSHWDrawConfig::SamplerSelector sel); + void PSSetUAV(u32 index, GSTexture* uav); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor); @@ -525,33 +530,36 @@ class GSDevice12 final : public GSDevice DIRTY_FLAG_TFX_TEXTURES = (1 << 2), DIRTY_FLAG_TFX_SAMPLERS = (1 << 3), DIRTY_FLAG_TFX_RT_TEXTURES = (1 << 4), - - DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 5), - DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 6), - DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 7), - DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 8), - DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 9), - DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 10), - - DIRTY_FLAG_VERTEX_BUFFER = (1 << 11), - DIRTY_FLAG_INDEX_BUFFER = (1 << 12), - DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 13), - DIRTY_FLAG_VIEWPORT = (1 << 14), - DIRTY_FLAG_SCISSOR = (1 << 15), - DIRTY_FLAG_RENDER_TARGET = (1 << 16), - DIRTY_FLAG_PIPELINE = (1 << 17), - DIRTY_FLAG_BLEND_CONSTANTS = (1 << 18), - DIRTY_FLAG_STENCIL_REF = (1 << 19), + DIRTY_FLAG_TFX_UAVS = (1 << 5), + + DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING = (1 << 6), + DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING = (1 << 7), + DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING = (1 << 8), + DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE = (1 << 9), + DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE = (1 << 10), + DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 = (1 << 11), + DIRTY_FLAG_UAV_DESCRIPTOR_TABLE = (1 << 12), + + DIRTY_FLAG_VERTEX_BUFFER = (1 << 13), + DIRTY_FLAG_INDEX_BUFFER = (1 << 14), + DIRTY_FLAG_PRIMITIVE_TOPOLOGY = (1 << 15), + DIRTY_FLAG_VIEWPORT = (1 << 16), + DIRTY_FLAG_SCISSOR = (1 << 17), + DIRTY_FLAG_RENDER_TARGET = (1 << 18), + DIRTY_FLAG_PIPELINE = (1 << 19), + DIRTY_FLAG_BLEND_CONSTANTS = (1 << 20), + DIRTY_FLAG_STENCIL_REF = (1 << 21), DIRTY_BASE_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | - DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PRIMITIVE_TOPOLOGY | - DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE | - DIRTY_FLAG_BLEND_CONSTANTS | DIRTY_FLAG_STENCIL_REF, + DIRTY_FLAG_UAV_DESCRIPTOR_TABLE | DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | + DIRTY_FLAG_PRIMITIVE_TOPOLOGY | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | + DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_PIPELINE | DIRTY_FLAG_BLEND_CONSTANTS | + DIRTY_FLAG_STENCIL_REF, - DIRTY_TFX_STATE = - DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | DIRTY_FLAG_TFX_RT_TEXTURES, + DIRTY_TFX_STATE = DIRTY_BASE_STATE | DIRTY_FLAG_TFX_TEXTURES | DIRTY_FLAG_TFX_SAMPLERS | + DIRTY_FLAG_TFX_RT_TEXTURES | DIRTY_FLAG_TFX_UAVS, DIRTY_UTILITY_STATE = DIRTY_BASE_STATE, DIRTY_CONSTANT_BUFFER_STATE = DIRTY_FLAG_VS_CONSTANT_BUFFER | DIRTY_FLAG_PS_CONSTANT_BUFFER, }; @@ -587,11 +595,13 @@ class GSDevice12 final : public GSDevice std::array m_tfx_constant_buffers{}; std::array m_tfx_textures{}; + std::array m_tfx_uavs{}; D3D12DescriptorHandle m_tfx_sampler; u32 m_tfx_sampler_sel = 0; D3D12DescriptorHandle m_tfx_textures_handle_gpu; D3D12DescriptorHandle m_tfx_samplers_handle_gpu; D3D12DescriptorHandle m_tfx_rt_textures_handle_gpu; + D3D12DescriptorHandle m_tfx_uav_handle_gpu; D3D12DescriptorHandle m_utility_texture_cpu; D3D12DescriptorHandle m_utility_texture_gpu; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index d5ecc893a6775..af2c8d3e21954 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -52,6 +52,15 @@ void GSRendererHW::Destroy() void GSRendererHW::PurgeTextureCache(bool sources, bool targets, bool hash_cache) { g_texture_cache->RemoveAll(sources, targets, hash_cache); + + if (targets) + { + g_gs_device->Recycle(m_rov_depth_tex.release()); + m_rov_color_dst = nullptr; + m_rov_depth_dst = nullptr; + m_rov_depth_rect = GSVector4i::zero(); + m_rov_mismatch_count = 0; + } } void GSRendererHW::ReadbackTextureCache() @@ -94,6 +103,10 @@ void GSRendererHW::UpdateSettings(const Pcsx2Config::GSOptions& old_config) void GSRendererHW::VSync(u32 field, bool registers_written, bool idle_frame) { + EndROVDepth(nullptr, nullptr); + m_rov_color_dst = nullptr; + m_rov_mismatch_count = 0; + if (GSConfig.LoadTextureReplacements) GSTextureReplacements::ProcessAsyncLoadedTextures(); @@ -3493,7 +3506,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS bool enable_fbmask_emulation = false; const GSDevice::FeatureSupport features = g_gs_device->Features(); - if (features.texture_barrier) + if (features.CanSampleFromFB()) { enable_fbmask_emulation = GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum; } @@ -3534,7 +3547,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS // If date is enabled you need to test the green channel instead of the alpha channel. // Only enable this code in DATE mode to reduce the number of shaders. - m_conf.ps.write_rg = (process_rg & SHUFFLE_WRITE) && features.texture_barrier && m_cached_ctx.TEST.DATE; + m_conf.ps.write_rg = (process_rg & SHUFFLE_WRITE) && features.CanSampleFromFB() && m_cached_ctx.TEST.DATE; m_conf.ps.real16src = m_copy_16bit_to_target_shuffle; m_conf.ps.shuffle_same = m_same_group_texture_shuffle; // Please bang my head against the wall! @@ -3668,7 +3681,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS have been invalidated before subsequent Draws are executed. */ // No blending so hit unsafe path. - if (!PRIM->ABE || !(~ff_fbmask & ~zero_fbmask & 0x7) || !g_gs_device->Features().texture_barrier) + if (!PRIM->ABE || !(~ff_fbmask & ~zero_fbmask & 0x7) || !g_gs_device->Features().CanSampleFromFB()) { GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_cached_ctx.FRAME.FBMSK, (m_conf.ps.dst_fmt == GSLocalMemory::PSM_FMT_16) ? 16 : 32); @@ -4011,7 +4024,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000; bool blend_ad_alpha_masked = blend_ad && alpha_mask; const bool is_basic_blend = GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic; - if ((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked) + if ((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.CanSampleFromFB() && blend_ad_alpha_masked) { // Swap Ad with As for hw blend. m_conf.ps.a_masked = 1; @@ -4065,7 +4078,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT // HW blend can be done in multiple passes when there's no overlap. // Blend second pass is only useful when texture barriers aren't supported. // Speed wise Texture barriers > blend second pass > texture copies. - const bool blend_second_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend; + const bool blend_second_pass_support = !features.CanSampleFromFB() && no_prim_overlap && is_basic_blend; const bool bmix1_second_pass = blend_second_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2; // We don't want to enable blend mix if we are doing a second pass, it's useless. blend_mix &= !bmix1_second_pass; @@ -4074,14 +4087,14 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT // Condition 1: Require full sw blend for full barrier. // Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead. // Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend. - const bool prefer_sw_blend = (features.texture_barrier && m_conf.require_full_barrier) || (m_conf.require_one_barrier && no_prim_overlap) || m_conf.ps.shuffle; + const bool prefer_sw_blend = (features.CanSampleFromFB() && m_conf.require_full_barrier) || (m_conf.require_one_barrier && no_prim_overlap) || m_conf.ps.shuffle; const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb || accumulation_blend; // Mix of hw/sw blending // Warning no break on purpose // Note: the [[fallthrough]] attribute tell compilers not to complain about not having breaks. bool sw_blending = false; - if (features.texture_barrier) + if (features.CanSampleFromFB()) { const bool blend_requires_barrier = (blend_flag & BLEND_A_MAX) // Impossible blending // Sw blend, either full barrier or one barrier with no overlap. @@ -4169,24 +4182,21 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT } } - if (features.framebuffer_fetch) + // If we have fbfetch, use software blending when we need the fb value for anything else. + // This saves outputting the second color when it's not needed. + if ((features.framebuffer_fetch && (one_barrier || m_conf.require_full_barrier)) || m_conf.ps.rov) { - // If we have fbfetch, use software blending when we need the fb value for anything else. - // This saves outputting the second color when it's not needed. - if (one_barrier || m_conf.require_full_barrier) - { - sw_blending = true; - color_dest_blend = false; - accumulation_blend = false; - blend_mix = false; - } + sw_blending = true; + color_dest_blend = false; + accumulation_blend = false; + blend_mix = false; } // Color clip if (COLCLAMP.CLAMP == 0) { bool free_colclip = false; - if (features.framebuffer_fetch) + if (features.framebuffer_fetch || features.raster_order_view) free_colclip = true; else if (features.texture_barrier) free_colclip = no_prim_overlap || blend_non_recursive; @@ -4237,7 +4247,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT if (sw_blending) { GL_INS("PABE mode ENABLED"); - if (features.texture_barrier) + if (features.CanSampleFromFB()) { // Disable hw/sw blend and do pure sw blend with reading the framebuffer. color_dest_blend = false; @@ -4920,7 +4930,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c { m_conf.tex = nullptr; m_conf.ps.tex_is_fb = true; - if (m_prim_overlap == PRIM_OVERLAP_NO || !g_gs_device->Features().texture_barrier) + if (m_prim_overlap == PRIM_OVERLAP_NO || !g_gs_device->Features().CanSampleFromFB()) m_conf.require_one_barrier = true; else m_conf.require_full_barrier = true; @@ -5074,7 +5084,7 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu const TextureMinMaxResult& tmm) { // Minimum blending or no barriers -> we can't use tex-is-fb. - if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().texture_barrier) + if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().CanSampleFromFB()) { GL_CACHE("Can't use tex-is-fb due to no barriers."); return false; @@ -5272,6 +5282,27 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.rt = rt ? rt->m_texture : nullptr; m_conf.ds = ds ? ds->m_texture : nullptr; + // End ROV early so we don't unnecessarily persist it in blending + const GSDevice::FeatureSupport features = g_gs_device->Features(); + if (features.raster_order_view) + { + // get rid of existing ROV when we're changing targets + if (m_rov_color_dst && m_rov_color_dst != m_conf.rt) + { + // RT change, so end ROV + GL_INS("RT change, ending ROV"); + m_rov_color_dst = nullptr; + m_rov_mismatch_count = 0; + EndROVDepth(m_conf.rt, m_conf.ds); + } + if (m_rov_depth_dst && m_conf.ds && (m_rov_depth_dst != m_conf.ds || m_rov_depth_dst == m_conf.tex)) + { + GL_INS("DS change, ending ROV depth"); + m_rov_mismatch_count = 0; + EndROVDepth(m_conf.rt, m_conf.ds); + } + } + // Z setup has to come before channel shuffle EmulateZbuffer(ds); @@ -5291,8 +5322,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta if (rt) EmulateTextureShuffleAndFbmask(rt, tex); - const GSDevice::FeatureSupport features = g_gs_device->Features(); - if (DATE) { const bool is_overlap_alpha = m_prim_overlap != PRIM_OVERLAP_NO && !(m_cached_ctx.FRAME.FBMSK & 0x80000000); @@ -5440,7 +5469,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // It is way too complex to emulate texture shuffle with DATE, so use accurate path. // No overlap should be triggered on gl/vk only as they support DATE_BARRIER. - if (features.framebuffer_fetch) + if (features.framebuffer_fetch || features.raster_order_view) { // Full DATE is "free" with framebuffer fetch. The barrier gets cleared below. DATE_BARRIER = true; @@ -5654,7 +5683,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // Depth test is always true so it can be executed in 2 passes (no order required) unlike color. // The idea is to compute first the color which is independent of the alpha test. And then do a 2nd // pass to handle the depth based on the alpha test. - const bool ate_first_pass = m_cached_ctx.TEST.DoFirstPass(); + bool ate_first_pass = m_cached_ctx.TEST.DoFirstPass(); bool ate_second_pass = m_cached_ctx.TEST.DoSecondPass(); bool ate_RGBA_then_Z = false; bool ate_RGB_then_Z = false; @@ -5730,6 +5759,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta DATE_PRIMID = true; } } + else if (ate_second_pass && features.raster_order_view) + { + GL_INS("Using ROV for AFAIL"); + ate_second_pass = false; + m_conf.require_full_barrier = true; + } } // No point outputting colours if we're just writing depth. @@ -5887,14 +5922,117 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.ps.rta_correction = rt->m_rt_alpha_scale; } - if (features.framebuffer_fetch) + // rs + const GSVector4i hacked_scissor = m_channel_shuffle ? GSVector4i::cxpr(0, 0, 1024, 1024) : m_context->scissor.in; + const GSVector4i scissor(GSVector4i(GSVector4(rtscale) * GSVector4(hacked_scissor)).rintersect(GSVector4i::loadh(rtsize))); + + m_conf.drawarea = m_channel_shuffle ? scissor : scissor.rintersect(ComputeBoundingBox(rtsize, rtscale)); + m_conf.scissor = (DATE && !DATE_BARRIER) ? m_conf.drawarea : scissor; + + SetupIA(rtscale, sx, sy); + + // do we want ROV for this draw? + bool use_rov = features.raster_order_view && + (m_conf.require_full_barrier || (!features.texture_barrier && m_conf.require_one_barrier)); + + // ROV setup + if (use_rov || m_rov_color_dst) + { + // preserve ROV/sw depth if we're already using it + bool mismatch = false; + if (m_rov_color_dst && !use_rov) + { + GL_INS("ROV not needed, but enabled, so using it (counter=%u)", m_rov_mismatch_count); + use_rov = true; + mismatch = true; + } + + bool use_rov_depth = use_rov && m_conf.ds && m_conf.ps.NeedsROVDepth(); + if (m_rov_depth_dst && !use_rov_depth) + { + GL_INS("SW depth not needed, but enabled, so using it (counter=%u)", m_rov_mismatch_count); + use_rov_depth = (m_conf.ds != nullptr); + mismatch = true; + } + + // update mismatch counter + m_rov_mismatch_count = mismatch ? (m_rov_mismatch_count + 1) : 0; + + if (use_rov) + { + // enable rov color output + m_conf.ps.rov = true; + m_conf.ps.no_color = true; + m_conf.ps.no_color1 = true; + m_rov_color_dst = rt->m_texture; + + // force fbmask on when we're not writing all channels + if (m_conf.colormask.wrgba != 0xF) + { + if (!m_conf.ps.fbmask) + { + m_conf.ps.fbmask = true; + m_conf.cb_ps.FbMask = GSVector4i::zero(); + } + if (!m_conf.colormask.wr) + m_conf.cb_ps.FbMask.r = 0xFF; + if (!m_conf.colormask.wg) + m_conf.cb_ps.FbMask.g = 0xFF; + if (!m_conf.colormask.wb) + m_conf.cb_ps.FbMask.b = 0xFF; + if (!m_conf.colormask.wa) + m_conf.cb_ps.FbMask.a = 0xFF; + } + + // reset blending to sw + // TODO: don't... do this. + m_conf.ps.blend_a = false; + m_conf.ps.blend_b = false; + m_conf.ps.blend_c = false; + m_conf.ps.blend_d = false; + m_conf.ps.blend_hw = false; + m_conf.ps.blend_mix = false; + m_conf.ps.hdr = false; + EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, rt, can_scale_rt_alpha, new_scale_rt_alpha); + + // kill all hw blending (it should be off anyway) + if (m_conf.blend.enable) + DevCon.Warning("HW blend enabled on ROV"); + m_conf.blend = {}; + + // kill two pass atest, we can do it in a single pass + if (m_cached_ctx.TEST.ATE) + { + ate_first_pass = true; + ate_second_pass = false; + m_conf.ps.afail = m_context->TEST.AFAIL; + } + + if (use_rov_depth) + { + // convert hw depth to sw depth + GL_INS("Using SW depth"); + if (!BeginROVDepth(m_conf.ds, m_conf.drawarea)) [[unlikely]] + return; + + m_conf.ps.ztst = m_conf.depth.ztst; + m_conf.ps.zwe = m_conf.depth.zwe; + m_conf.depth.ztst = ZTST_ALWAYS; + m_conf.depth.zwe = false; + m_conf.ds = m_rov_depth_tex.get(); + m_conf.rov_depth = true; + } + } + } + + if (features.framebuffer_fetch || use_rov) { // Intel GPUs on Metal lock up if you try to use DSB and framebuffer fetch at once // We should never need to do that (since using framebuffer fetch means you should be able to do all blending in shader), but sometimes it slips through if (m_conf.require_one_barrier || m_conf.require_full_barrier) pxAssert(!m_conf.blend.enable); - // Barriers aren't needed with fbfetch. + // Barriers aren't needed with fbfetch or ROV. m_conf.require_one_barrier = false; m_conf.require_full_barrier = false; } @@ -5908,15 +6046,6 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.require_one_barrier = true; } - // rs - const GSVector4i hacked_scissor = m_channel_shuffle ? GSVector4i::cxpr(0, 0, 1024, 1024) : m_context->scissor.in; - const GSVector4i scissor(GSVector4i(GSVector4(rtscale) * GSVector4(hacked_scissor)).rintersect(GSVector4i::loadh(rtsize))); - - m_conf.drawarea = m_channel_shuffle ? scissor : scissor.rintersect(ComputeBoundingBox(rtsize, rtscale)); - m_conf.scissor = (DATE && !DATE_BARRIER) ? m_conf.drawarea : scissor; - - SetupIA(rtscale, sx, sy); - if (ate_second_pass) { pxAssert(!env.PABE.PABE); @@ -6004,7 +6133,16 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.drawlist = (m_conf.require_full_barrier && m_vt.m_primclass == GS_SPRITE_CLASS) ? &m_drawlist : nullptr; + // normal draw, no rov g_gs_device->RenderHW(m_conf); + + // if we mismatched too many times, end rov + if (use_rov && m_rov_mismatch_count >= 20) + { + GL_INS("Disabling ROV due to too many mismatches"); + m_rov_color_dst = nullptr; + EndROVDepth(m_conf.rt, ds ? ds->m_texture : nullptr); + } } // If the EE uploaded a new CLUT since the last draw, use that. @@ -6198,6 +6336,152 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDrawAggressive() return CLUTDrawTestResult::CLUTDrawOnCPU; } +bool GSRendererHW::CanContinueROVDepth(GSTexture* ds, const GSVector4i& area) +{ + if (!m_rov_depth_tex || m_rov_depth_dst != ds) + return false; + + // new area is contained? + if (area.x >= m_rov_depth_rect.left && area.y >= m_rov_depth_rect.top && + area.z <= m_rov_depth_rect.right && area.w <= m_rov_depth_rect.bottom) + { + // 2ez + return true; + } + + // TODO: Align B + const GSVector4 ds_sz(GSVector4i(ds->GetSize()).xyxy()); + GSDevice::MultiStretchRect new_rects[4]; + u32 num_new_rects = 0; + const auto add_rect = [&](const GSVector4i& rc) + { + const GSVector4 frc(rc); + new_rects[num_new_rects++] = { + frc / ds_sz, frc, ds, false, 0xf + }; + }; + + const GSVector4i& A = m_rov_depth_rect; + const GSVector4i& B = area; + if (B.top < A.top) + { + // rectangle above A + add_rect(GSVector4i( + std::min(A.left, B.left), B.top, + std::max(A.right, B.right), A.top + )); + } + if (B.bottom > A.bottom) + { + // rectangle below A + add_rect(GSVector4i( + std::min(A.left, B.left), A.bottom, + std::max(A.right, B.right), B.bottom + )); + } + if (B.left < A.left) + { + // rectangle to the left of A + add_rect(GSVector4i( + B.left, A.top, A.left, A.bottom + )); + } + if (B.right > A.right) + { + // rectangle to the right of A + add_rect(GSVector4i( + A.right, A.top, B.right, A.bottom + )); + } + + // expand SW depth area + m_rov_depth_rect = GSVector4i( + std::min(A.left, B.left), std::min(A.top, B.top), + std::max(A.right, B.right), std::max(A.bottom, B.bottom) + ); + + pxAssertRel(num_new_rects > 0, "Has new rects"); + g_gs_device->DrawMultiStretchRects(new_rects, num_new_rects, m_rov_depth_tex.get(), ShaderConvert::COPY); + return true; +} + +bool GSRendererHW::BeginROVDepth(GSTexture* ds, const GSVector4i& area) +{ + // align the area to 128x128, hopefully reducing the number of times we need to do this + const int alignment = 128 * GSConfig.UpscaleMultiplier; + const GSVector4i aligned_area(area.ralign(GSVector2i(alignment, alignment))); + + if (CanContinueROVDepth(ds, aligned_area)) + return true; + + EndROVDepth(nullptr, nullptr); + + if (!m_rov_depth_tex || m_rov_depth_tex->GetSize() != ds->GetSize()) + { + if (m_rov_depth_tex) + g_gs_device->Recycle(m_rov_depth_tex.release()); + + m_rov_depth_tex = std::unique_ptr( + g_gs_device->CreateRenderTarget(ds->GetWidth(), ds->GetHeight(), GSTexture::Format::ColorDepth, false)); + if (!m_rov_depth_tex) [[unlikely]] + { + GL_INS("ERROR: Failed to allocate memory for ROV depth, skipping."); + return false; + } + } + + g_gs_device->InvalidateRenderTarget(m_rov_depth_tex.get()); + + const GSVector4 farea(aligned_area); + const GSVector4 sRect(farea / GSVector4(ds->GetWidth(), ds->GetHeight(), ds->GetWidth(), ds->GetHeight())); + g_gs_device->StretchRect(ds, sRect, m_rov_depth_tex.get(), farea, ShaderConvert::COPY, false); + m_rov_depth_dst = ds; + m_rov_depth_rect = aligned_area; + return true; +} + +void GSRendererHW::EndROVDepth(GSTexture* new_rt, GSTexture* new_ds) +{ + if (!m_rov_depth_dst) + return; + + // TODO: Leave RT bound to avoid a possible render pass restart after this. + const GSVector4 farea(m_rov_depth_rect); + g_gs_device->StretchRect(m_rov_depth_tex.get(), + farea / GSVector4(m_rov_depth_dst->GetWidth(), m_rov_depth_dst->GetHeight(), m_rov_depth_dst->GetWidth(), + m_rov_depth_dst->GetHeight()), + m_rov_depth_dst, farea, ShaderConvert::DEPTH_COPY, false); + m_rov_depth_dst = nullptr; + m_rov_depth_rect = GSVector4i::zero(); +} + +void GSRendererHW::FlushROVDepthForTexture(GSTexture* ds, bool discard) +{ + if (m_rov_depth_dst != ds) + return; + + if (!discard) + { + GL_INS("Flushing ROV depth: %d,%d => %d,%d", m_rov_depth_rect.left, m_rov_depth_rect.top, + m_rov_depth_rect.right, m_rov_depth_rect.bottom); + + const GSVector4 farea(m_rov_depth_rect); + g_gs_device->StretchRect(m_rov_depth_tex.get(), + farea / GSVector4(m_rov_depth_dst->GetWidth(), m_rov_depth_dst->GetHeight(), m_rov_depth_dst->GetWidth(), + m_rov_depth_dst->GetHeight()), + m_rov_depth_dst, farea, ShaderConvert::DEPTH_COPY, false); + m_rov_depth_dst = nullptr; + m_rov_depth_rect = GSVector4i::zero(); + } + else + { + GL_INS("Discarding ROV depth."); + } + + m_rov_depth_dst = nullptr; + m_rov_depth_rect = GSVector4i::zero(); +} + bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex) { // Master enable. @@ -7212,7 +7496,7 @@ void GSRendererHW::EndHLEHardwareDraw(bool force_copy_on_hazard /* = false */) { const GSDevice::FeatureSupport features = g_gs_device->Features(); - if (!force_copy_on_hazard && config.tex == config.rt && features.texture_barrier) + if (!force_copy_on_hazard && config.tex == config.rt && features.CanSampleFromFB()) { // Sample RT 1:1. config.require_one_barrier = !features.framebuffer_fetch; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 78a123a94483c..3950cdc6a3150 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -124,6 +124,10 @@ class GSRendererHW : public GSRenderer bool IsUsingCsInBlend(); bool IsUsingAsInBlend(); + bool CanContinueROVDepth(GSTexture* ds, const GSVector4i& area); + bool BeginROVDepth(GSTexture* ds, const GSVector4i& area); + void EndROVDepth(GSTexture* new_rt, GSTexture* new_ds); + // We modify some of the context registers to optimize away unnecessary operations. // Instead of messing with the real context, we copy them and use those instead. struct HWCachedCtx @@ -180,6 +184,12 @@ class GSRendererHW : public GSRenderer GSHWDrawConfig m_conf = {}; HWCachedCtx m_cached_ctx; + std::unique_ptr m_rov_depth_tex; + GSTexture* m_rov_color_dst = nullptr; + GSTexture* m_rov_depth_dst = nullptr; + GSVector4i m_rov_depth_rect{}; + u32 m_rov_mismatch_count = 0; + // software sprite renderer state std::vector m_sw_vertex_buffer; std::unique_ptr m_sw_texture[7 + 1]; @@ -246,4 +256,7 @@ class GSRendererHW : public GSRenderer /// Submits a previously set up HLE hardware draw, copying any textures as needed if there's hazards. void EndHLEHardwareDraw(bool force_copy_on_hazard = false); + + /// Finishes ROV depth if it matches. + void FlushROVDepthForTexture(GSTexture* ds, bool discard); }; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index a80342499d775..07a14aa5e0e55 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -981,6 +981,9 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c src->m_valid_rect = dst->m_valid; src->m_end_block = dst->m_end_block; + if (dst->m_type == DepthStencil) + GSRendererHW::GetInstance()->FlushROVDepthForTexture(dst->GetTexture(), false); + if (inside_target) { // Need to set it up as a region target. @@ -1980,6 +1983,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (dst->m_scale != scale) { + if (dst->m_type == DepthStencil) + GSRendererHW::GetInstance()->FlushROVDepthForTexture(dst->GetTexture(), false); + calcRescale(dst); GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, clear) : g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, clear); @@ -2024,6 +2030,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid = dst_match->m_valid; dst->UpdateValidity(dst_match->m_valid); + GSRendererHW::GetInstance()->FlushROVDepthForTexture(dst_match->GetTexture(), false); + if (!CopyRGBFromDepthToColor(dst, dst_match)) { // Needed new texture and memory allocation failed. @@ -2227,6 +2235,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } else { + if (dst_match->m_type == DepthStencil) + GSRendererHW::GetInstance()->FlushROVDepthForTexture(dst_match->GetTexture(), false); + // The old target's going to get invalidated (at least until we handle concurrent frame+depth at the same BP), // so just move the dirty rects across, unless the format is diffent, in which case we need to update it. if (dst->m_TEX0.PSM != dst_match->m_TEX0.PSM) @@ -6052,6 +6063,9 @@ GSTextureCache::Target::~Target() pxAssert(!m_shared_texture); if (m_texture) { + if (m_type == DepthStencil) + GSRendererHW::GetInstance()->FlushROVDepthForTexture(m_texture, true); + g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); g_gs_device->Recycle(m_texture); } @@ -6102,6 +6116,9 @@ void GSTextureCache::Target::Update(bool cannot_scale) return; } + if (m_type == DepthStencil) + GSRendererHW::GetInstance()->FlushROVDepthForTexture(m_texture, false); + const GSVector4i t_offset(total_rect.xyxy()); const GSVector4i t_size(total_rect - t_offset); const GSVector4 t_sizef(t_size.zwzw()); @@ -6394,6 +6411,9 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca return false; } + if (m_type == DepthStencil) + GSRendererHW::GetInstance()->FlushROVDepthForTexture(m_texture, false); + // Only need to copy if it's been written to. if (m_texture->GetState() == GSTexture::State::Dirty) { diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index b9240682c1e3f..16ef697bd62d8 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -492,6 +492,7 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format) case GSTexture::Format::Color: return MTLPixelFormatRGBA8Unorm; case GSTexture::Format::HDRColor: return MTLPixelFormatRGBA16Unorm; case GSTexture::Format::DepthStencil: return MTLPixelFormatDepth32Float_Stencil8; + case GSTexture::Format::ColorDepth: return MTLPixelFormatR32Float; case GSTexture::Format::Invalid: return MTLPixelFormatInvalid; case GSTexture::Format::BC1: return MTLPixelFormatBC1_RGBA; case GSTexture::Format::BC2: return MTLPixelFormatBC2_RGBA; diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp index 1ded1c69f9baf..ef68734eb1892 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp @@ -79,6 +79,14 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format m_int_shift = 3; break; + // 1 channel float + case Format::ColorDepth: + gl_fmt = GL_R32F; + m_int_format = GL_RED; + m_int_type = GL_FLOAT; + m_int_shift = 2; + break; + // Depth buffer case Format::DepthStencil: { diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 72ec313468f37..5d1aa482cceac 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -43,6 +43,8 @@ enum : u32 TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024, }; +// TODO: THIS SUCKS +const VkRenderPass GSDeviceVK::DYNAMIC_RENDERING_RENDER_PASS = reinterpret_cast(-1); #ifdef ENABLE_OGL_DEBUG static u32 s_debug_scope_depth = 0; @@ -378,6 +380,13 @@ bool GSDeviceVK::SelectDeviceExtensions(ExtensionList* extension_list, bool enab m_optional_extensions.vk_ext_line_rasterization = SupportsExtension(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, require_line_rasterization); m_optional_extensions.vk_khr_driver_properties = SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false); + m_optional_extensions.vk_ext_fragment_shader_interlock = + SupportsExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, false); + m_optional_extensions.vk_khr_dynamic_rendering = + SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) && + SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, + false) && // Not actually needed by us, but needed for dynamic rendering. + SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false); // glslang generates debug info instructions before phi nodes at the beginning of blocks when non-semantic debug info // is enabled, triggering errors by spirv-val. Gate it by an environment variable if you want source debugging until @@ -573,6 +582,10 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT}; VkPhysicalDeviceLineRasterizationFeaturesEXT line_rasterization_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT}; + VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamic_rendering_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR}; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT}; if (m_optional_extensions.vk_ext_provoking_vertex) { @@ -589,6 +602,16 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess = VK_TRUE; Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature); } + if (m_optional_extensions.vk_khr_dynamic_rendering) + { + dynamic_rendering_feature.dynamicRendering = VK_TRUE; + Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature); + } + if (m_optional_extensions.vk_ext_fragment_shader_interlock) + { + fragment_shader_interlock_feature.fragmentShaderPixelInterlock = VK_TRUE; + Vulkan::AddPointerToChain(&device_info, &fragment_shader_interlock_feature); + } VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); if (res != VK_SUCCESS) @@ -654,6 +677,10 @@ bool GSDeviceVK::ProcessDeviceExtensions() VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT}; VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT}; + VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamic_rendering_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR}; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT}; // add in optional feature structs if (m_optional_extensions.vk_ext_provoking_vertex) @@ -662,6 +689,10 @@ bool GSDeviceVK::ProcessDeviceExtensions() Vulkan::AddPointerToChain(&features2, &line_rasterization_feature); if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature); + if (m_optional_extensions.vk_khr_dynamic_rendering) + Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature); + if (m_optional_extensions.vk_ext_fragment_shader_interlock) + Vulkan::AddPointerToChain(&features2, &fragment_shader_interlock_feature); // query vkGetPhysicalDeviceFeatures2(m_physical_device, &features2); @@ -670,6 +701,9 @@ bool GSDeviceVK::ProcessDeviceExtensions() m_optional_extensions.vk_ext_provoking_vertex &= (provoking_vertex_features.provokingVertexLast == VK_TRUE); m_optional_extensions.vk_ext_rasterization_order_attachment_access &= (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE); + m_optional_extensions.vk_ext_fragment_shader_interlock &= + (fragment_shader_interlock_feature.fragmentShaderPixelInterlock == VK_TRUE); + m_optional_extensions.vk_khr_dynamic_rendering = (dynamic_rendering_feature.dynamicRendering == VK_TRUE); VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; @@ -751,6 +785,10 @@ bool GSDeviceVK::ProcessDeviceExtensions() m_optional_extensions.vk_ext_full_screen_exclusive ? "supported" : "NOT supported"); Console.WriteLn("VK_KHR_driver_properties is %s", m_optional_extensions.vk_khr_driver_properties ? "supported" : "NOT supported"); + Console.WriteLn("VK_EXT_fragment_shader_interlock is %s", + m_optional_extensions.vk_ext_fragment_shader_interlock ? "supported" : "NOT supported"); + Console.WriteLn("VK_KHR_dynamic_rendering is %s", + m_optional_extensions.vk_khr_dynamic_rendering ? "supported" : "NOT supported"); return true; } @@ -2650,6 +2688,11 @@ bool GSDeviceVK::CheckFeatures() m_features.provoking_vertex_last = m_optional_extensions.vk_ext_provoking_vertex; m_features.vs_expand = !GSConfig.DisableVertexShaderExpand; + // TODO: Autoselection + m_features.raster_order_view = m_optional_extensions.vk_ext_fragment_shader_interlock && + m_optional_extensions.vk_khr_dynamic_rendering && + GSConfig.OverrideRasterizerOrderViews != 0; + if (!m_features.texture_barrier) Console.Warning("Texture buffers are disabled. This may break some graphical effects."); @@ -2667,6 +2710,9 @@ bool GSDeviceVK::CheckFeatures() // Buggy drivers with broken barriers probably have no chance using GENERAL layout for depth either... m_features.test_and_sample_depth = m_features.texture_barrier; + // Don't bother with ROV/FSI if we have fbfetch, it'll be faster. + m_features.raster_order_view &= !m_features.framebuffer_fetch; + // Use D32F depth instead of D32S8 when we have framebuffer fetch. m_features.stencil_buffer &= !m_features.framebuffer_fetch; @@ -2744,6 +2790,7 @@ VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format) const VK_FORMAT_R8G8B8A8_UNORM, // Color VK_FORMAT_R16G16B16A16_UNORM, // HDRColor VK_FORMAT_D32_SFLOAT_S8_UINT, // DepthStencil + VK_FORMAT_R32_SFLOAT, // ColorDepth VK_FORMAT_R8_UNORM, // UNorm8 VK_FORMAT_R16_UINT, // UInt16 VK_FORMAT_R32_UINT, // UInt32 @@ -2884,9 +2931,15 @@ void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top), int(dRect.right - dRect.left), int(dRect.bottom - dRect.top)); - DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, - dTex ? m_convert[static_cast(shader)] : m_present[static_cast(shader)], linear, - ShaderConvertWriteMask(shader) == 0xf); + // TODO: make less hacky + const bool rov_depth_writeback = (sTex->GetFormat() == GSTexture::Format::ColorDepth); + const VkPipeline pipeline = (dTex->GetFormat() == GSTexture::Format::ColorDepth) ? + m_rov_depth_begin_pipeline : + m_convert[static_cast(shader)]; + + DoStretchRect(static_cast(sTex), sRect, + static_cast(dTex), dRect, pipeline, linear, + ShaderConvertWriteMask(shader) == 0xf && !rov_depth_writeback); } void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, @@ -2917,6 +2970,8 @@ void GSDeviceVK::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* void GSDeviceVK::DrawMultiStretchRects( const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) { + // TODO: Move this over to dynamic rendering. + GSTexture* last_tex = rects[0].src; bool last_linear = rects[0].linear; u8 last_wmask = rects[0].wmask.wrgba; @@ -3023,7 +3078,10 @@ void GSDeviceVK::DoMultiStretchRects( pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf); int rta_bit = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; SetPipeline( - (rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba | rta_bit] : m_convert[static_cast(shader)]); + (dTex->GetFormat() == GSTexture::Format::ColorDepth) ? + m_rov_depth_begin_pipeline : + (rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba | rta_bit] : + m_convert[static_cast(shader)]); if (ApplyUtilityState()) DrawIndexedPrimitive(); @@ -3632,6 +3690,9 @@ static void AddShaderHeader(std::stringstream& ss) ss << "#extension GL_EXT_samplerless_texture_functions : require\n"; ss << "#extension GL_ARB_shader_draw_parameters : require\n"; + if (features.raster_order_view) + ss << "#extension GL_ARB_fragment_shader_interlock : require\n"; + if (!features.texture_barrier) ss << "#define DISABLE_TEXTURE_BARRIER 1\n"; } @@ -3800,6 +3861,25 @@ bool GSDeviceVK::CreatePipelineLayouts() if ((m_tfx_pipeline_layout = plb.Create(dev)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(dev, m_tfx_pipeline_layout, "TFX pipeline layout"); + + if (m_features.raster_order_view) + { + dslb.SetPushFlag(); + dslb.AddBinding(TFX_TEXTURE_TEXTURE, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(TFX_TEXTURE_PALETTE, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(TFX_TEXTURE_RT, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(TFX_TEXTURE_DS, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_tfx_rov_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(dev, m_tfx_rov_ds_layout, "TFX ROV descriptor layout"); + + plb.AddDescriptorSet(m_tfx_ubo_ds_layout); + plb.AddDescriptorSet(m_tfx_rov_ds_layout); + if ((m_tfx_rov_pipeline_layout = plb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(dev, m_tfx_rov_pipeline_layout, "TFX ROV pipeline layout"); + } + return true; } @@ -4027,6 +4107,18 @@ bool GSDeviceVK::CompileConvertPipelines() Vulkan::SetObjectName(m_device, m_color_copy[j], "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u); } + + // ROV depth begin + gpb.SetRenderPass(GetRenderPass(LookupNativeFormat(GSTexture::Format::ColorDepth), + VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE), + 0); + gpb.SetNoBlendingState(); + m_rov_depth_begin_pipeline = + gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_rov_depth_begin_pipeline) + return false; + Vulkan::SetObjectName(m_device, m_rov_depth_begin_pipeline, "Convert depth to ROV"); } else if (i == ShaderConvert::HDR_INIT || i == ShaderConvert::HDR_RESOLVE) { @@ -4555,8 +4647,8 @@ bool GSDeviceVK::DoCAS( // only happening once a frame, so the update isn't a huge deal. Vulkan::DescriptorSetUpdateBuilder dsub; - dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, 0, sTexVK->GetView(), sTexVK->GetVkLayout()); - dsub.AddStorageImageDescriptorWrite(VK_NULL_HANDLE, 1, dTexVK->GetView(), dTexVK->GetVkLayout()); + dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, 0, sTexVK->GetView(), VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, sTexVK->GetVkLayout()); + dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, 1, dTexVK->GetView(), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, dTexVK->GetVkLayout()); dsub.PushUpdate(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_cas_pipeline_layout, 0, false); // the actual meat and potatoes! only four commands. @@ -4629,6 +4721,8 @@ void GSDeviceVK::DestroyResources() vkDestroyPipeline(m_device, m_date_image_setup_pipelines[ds][datm], nullptr); } } + if (m_rov_depth_begin_pipeline != VK_NULL_HANDLE) + vkDestroyPipeline(m_device, m_rov_depth_begin_pipeline, nullptr); if (m_fxaa_pipeline != VK_NULL_HANDLE) vkDestroyPipeline(m_device, m_fxaa_pipeline, nullptr); if (m_shadeboost_pipeline != VK_NULL_HANDLE) @@ -4661,10 +4755,14 @@ void GSDeviceVK::DestroyResources() if (m_expand_index_buffer != VK_NULL_HANDLE) vmaDestroyBuffer(m_allocator, m_expand_index_buffer, m_expand_index_buffer_allocation); + if (m_tfx_rov_pipeline_layout != VK_NULL_HANDLE) + vkDestroyPipelineLayout(m_device, m_tfx_rov_pipeline_layout, nullptr); if (m_tfx_pipeline_layout != VK_NULL_HANDLE) vkDestroyPipelineLayout(m_device, m_tfx_pipeline_layout, nullptr); if (m_tfx_texture_ds_layout != VK_NULL_HANDLE) vkDestroyDescriptorSetLayout(m_device, m_tfx_texture_ds_layout, nullptr); + if (m_tfx_rov_ds_layout != VK_NULL_HANDLE) + vkDestroyDescriptorSetLayout(m_device, m_tfx_rov_ds_layout, nullptr); if (m_tfx_ubo_ds_layout != VK_NULL_HANDLE) vkDestroyDescriptorSetLayout(m_device, m_tfx_ubo_ds_layout, nullptr); if (m_utility_pipeline_layout != VK_NULL_HANDLE) @@ -4799,6 +4897,9 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb); AddMacro(ss, "PS_NO_COLOR", sel.no_color); AddMacro(ss, "PS_NO_COLOR1", sel.no_color1); + AddMacro(ss, "PS_ROV", sel.rov); + AddMacro(ss, "PS_ZTST", sel.ztst); + AddMacro(ss, "PS_ZWE", sel.zwe); ss << m_tfx_source; VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str()); @@ -4835,21 +4936,36 @@ VkPipeline GSDeviceVK::CreateTFXPipeline(const PipelineSelector& p) SetPipelineProvokingVertex(m_features, gpb); // Common state - gpb.SetPipelineLayout(m_tfx_pipeline_layout); - if (IsDATEModePrimIDInit(p.ps.date)) + if (!p.IsROV()) { - // DATE image prepass - gpb.SetRenderPass(m_date_image_setup_render_passes[p.ds][0], 0); + gpb.SetPipelineLayout(m_tfx_pipeline_layout); + if (IsDATEModePrimIDInit(p.ps.date)) + { + // DATE image prepass + gpb.SetRenderPass(m_date_image_setup_render_passes[p.ds][0], 0); + } + else + { + gpb.SetRenderPass( + GetTFXRenderPass(p.rt, p.ds, p.ps.hdr, p.dss.date, + p.IsRTFeedbackLoop(), p.IsTestingAndSamplingDepth(), + p.rt ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, + p.ds ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE), + 0); + } } else { - gpb.SetRenderPass( - GetTFXRenderPass(p.rt, p.ds, p.ps.hdr, p.dss.date, - p.IsRTFeedbackLoop(), p.IsTestingAndSamplingDepth(), - p.rt ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, - p.ds ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE), - 0); + gpb.SetPipelineLayout(m_tfx_rov_pipeline_layout); + gpb.SetDynamicRendering(); + if (p.ds) + { + const VkFormat depth_format = LookupNativeFormat(GSTexture::Format::DepthStencil); + gpb.SetDynamicRenderingDepthAttachment( + depth_format, m_features.stencil_buffer ? depth_format : VK_FORMAT_UNDEFINED); + } } + gpb.SetPrimitiveTopology(topology_lookup[p.topology]); gpb.SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); if (m_optional_extensions.vk_ext_line_rasterization && @@ -4955,7 +5071,7 @@ bool GSDeviceVK::BindDrawPipeline(const PipelineSelector& p) SetPipeline(pipeline); - return ApplyTFXState(); + return ApplyTFXState(p.IsROV() ? PipelineLayout::ROVTFX : PipelineLayout::TFX); } void GSDeviceVK::InitializeState() @@ -5089,6 +5205,7 @@ void GSDeviceVK::InvalidateCachedState() m_current_pipeline_layout = PipelineLayout::Undefined; m_tfx_texture_descriptor_set = VK_NULL_HANDLE; m_tfx_rt_descriptor_set = VK_NULL_HANDLE; + m_tfx_image_descriptor_set = VK_NULL_HANDLE; m_utility_descriptor_set = VK_NULL_HANDLE; } @@ -5211,11 +5328,6 @@ void GSDeviceVK::UnbindTexture(GSTextureVK* tex) } } -bool GSDeviceVK::InRenderPass() -{ - return m_current_render_pass != VK_NULL_HANDLE; -} - void GSDeviceVK::BeginRenderPass(VkRenderPass rp, const GSVector4i& rect) { if (m_current_render_pass != VK_NULL_HANDLE) @@ -5267,10 +5379,14 @@ void GSDeviceVK::EndRenderPass() if (m_current_render_pass == VK_NULL_HANDLE) return; + VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + if (m_current_render_pass != DYNAMIC_RENDERING_RENDER_PASS) + vkCmdEndRenderPass(cmdbuf); + else + vkCmdEndRenderingKHR(cmdbuf); + m_current_render_pass = VK_NULL_HANDLE; g_perfmon.Put(GSPerfMon::RenderPasses, 1); - - vkCmdEndRenderPass(GetCurrentCommandBuffer()); } void GSDeviceVK::SetViewport(const VkViewport& viewport) @@ -5334,9 +5450,9 @@ __ri void GSDeviceVK::ApplyBaseState(u32 flags, VkCommandBuffer cmdbuf) vkCmdSetLineWidth(cmdbuf, m_current_line_width); } -bool GSDeviceVK::ApplyTFXState(bool already_execed) +bool GSDeviceVK::ApplyTFXState(PipelineLayout layout, bool already_execed) { - if (m_current_pipeline_layout == PipelineLayout::TFX && m_dirty_flags == 0) + if (m_current_pipeline_layout == layout && m_dirty_flags == 0) return true; const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); @@ -5356,7 +5472,7 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed) } ExecuteCommandBufferAndRestartRenderPass(false, "Ran out of vertex uniform space"); - return ApplyTFXState(true); + return ApplyTFXState(layout, true); } std::memcpy(m_vertex_uniform_stream_buffer.GetCurrentHostPointer(), &m_vs_cb_cache, sizeof(m_vs_cb_cache)); @@ -5377,7 +5493,7 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed) } ExecuteCommandBufferAndRestartRenderPass(false, "Ran out of pixel uniform space"); - return ApplyTFXState(true); + return ApplyTFXState(layout, true); } std::memcpy(m_fragment_uniform_stream_buffer.GetCurrentHostPointer(), &m_ps_cb_cache, sizeof(m_ps_cb_cache)); @@ -5387,22 +5503,26 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed) } Vulkan::DescriptorSetUpdateBuilder dsub; - if (m_current_pipeline_layout != PipelineLayout::TFX) + if (m_current_pipeline_layout != layout) { - m_current_pipeline_layout = PipelineLayout::TFX; + m_current_pipeline_layout = layout; flags |= DIRTY_FLAG_TFX_UBO | DIRTY_FLAG_TFX_TEXTURES; - // Clear out the RT binding if feedback loop isn't on, because it'll be in the wrong state and make - // the validation layer cranky. Not a big deal since we need to write it anyway. - const GSTextureVK::Layout rt_tex_layout = m_tfx_textures[TFX_TEXTURE_RT]->GetLayout(); - if (rt_tex_layout != GSTextureVK::Layout::FeedbackLoop && rt_tex_layout != GSTextureVK::Layout::ShaderReadOnly) - m_tfx_textures[TFX_TEXTURE_RT] = m_null_texture.get(); + if (layout != PipelineLayout::ROVTFX) + { + // Clear out the RT binding if feedback loop isn't on, because it'll be in the wrong state and make + // the validation layer cranky. Not a big deal since we need to write it anyway. + const GSTextureVK::Layout rt_tex_layout = m_tfx_textures[TFX_TEXTURE_RT]->GetLayout(); + if (rt_tex_layout != GSTextureVK::Layout::FeedbackLoop && rt_tex_layout != GSTextureVK::Layout::ShaderReadOnly) + m_tfx_textures[TFX_TEXTURE_RT] = m_null_texture.get(); + } } + const VkPipelineLayout pipeline_layout = (layout == PipelineLayout::ROVTFX) ? m_tfx_rov_pipeline_layout : m_tfx_pipeline_layout; if (flags & DIRTY_FLAG_TFX_UBO) { // Still need to bind the UBO descriptor set. - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, 0, 1, + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_tfx_ubo_descriptor_set, NUM_TFX_DYNAMIC_OFFSETS, m_tfx_dynamic_offsets.data()); } @@ -5417,11 +5537,12 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed) if (flags & DIRTY_FLAG_TFX_TEXTURE_PALETTE) { dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_PALETTE, - m_tfx_textures[TFX_TEXTURE_PALETTE]->GetView(), m_tfx_textures[TFX_TEXTURE_PALETTE]->GetVkLayout()); + m_tfx_textures[TFX_TEXTURE_PALETTE]->GetView(), VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + m_tfx_textures[TFX_TEXTURE_PALETTE]->GetVkLayout()); } if (flags & DIRTY_FLAG_TFX_TEXTURE_RT) { - if (m_features.texture_barrier) + if (layout == PipelineLayout::TFX && m_features.texture_barrier) { dsub.AddInputAttachmentDescriptorWrite( VK_NULL_HANDLE, TFX_TEXTURE_RT, m_tfx_textures[TFX_TEXTURE_RT]->GetView(), VK_IMAGE_LAYOUT_GENERAL); @@ -5429,16 +5550,18 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed) else { dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_RT, m_tfx_textures[TFX_TEXTURE_RT]->GetView(), + (layout == PipelineLayout::ROVTFX) ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, m_tfx_textures[TFX_TEXTURE_RT]->GetVkLayout()); } } if (flags & DIRTY_FLAG_TFX_TEXTURE_PRIMID) { - dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_PRIMID, - m_tfx_textures[TFX_TEXTURE_PRIMID]->GetView(), m_tfx_textures[TFX_TEXTURE_PRIMID]->GetVkLayout()); + dsub.AddImageDescriptorWrite(VK_NULL_HANDLE, TFX_TEXTURE_PRIMID, m_tfx_textures[TFX_TEXTURE_PRIMID]->GetView(), + (layout == PipelineLayout::ROVTFX) ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + m_tfx_textures[TFX_TEXTURE_PRIMID]->GetVkLayout()); } - dsub.PushUpdate(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_tfx_pipeline_layout, TFX_DESCRIPTOR_SET_TEXTURES); + dsub.PushUpdate(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, TFX_DESCRIPTOR_SET_TEXTURES); } ApplyBaseState(flags, cmdbuf); @@ -5726,7 +5849,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) // HDR requires blitting. EndRenderPass(); } - else if (InRenderPass() && (m_current_render_target == draw_rt || m_current_depth_target == draw_ds)) + else if (InRenderPass() && ((m_current_render_pass == DYNAMIC_RENDERING_RENDER_PASS) == pipe.IsROV()) && + (m_current_render_target == draw_rt || m_current_depth_target == draw_ds)) { // avoid restarting the render pass just to switch from rt+depth to rt and vice versa // keep the depth even if doing HDR draws, because the next draw will probably re-enable depth @@ -5758,56 +5882,156 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) const bool skip_first_barrier = (draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.hdr && !IsDeviceAMD()); - OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast(pipe.feedback_loop_flags)); - if (pipe.IsRTFeedbackLoop()) + if (!pipe.IsROV()) { - pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled"); - PSSetShaderResource(2, draw_rt, false); + OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast(pipe.feedback_loop_flags)); - // If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor. - // Otherwise, we might have a previous descriptor left over, that has the RT in a different state. - m_dirty_flags |= (skip_first_barrier ? static_cast(DIRTY_FLAG_TFX_TEXTURE_RT) : 0); - } + if (pipe.IsRTFeedbackLoop()) + { + pxAssertMsg(m_features.texture_barrier, "Texture barriers enabled"); + PSSetShaderResource(2, draw_rt, false); - // Begin render pass if new target or out of the area. - if (!InRenderPass()) + // If this is the first draw to the target as a feedback loop, make sure we re-generate the texture descriptor. + // Otherwise, we might have a previous descriptor left over, that has the RT in a different state. + m_dirty_flags |= (skip_first_barrier ? static_cast(DIRTY_FLAG_TFX_TEXTURE_RT) : 0); + } + + // Begin render pass if new target or out of the area. + if (!InRenderPass()) + { + const VkAttachmentLoadOp rt_op = GetLoadOpForTexture(draw_rt); + const VkAttachmentLoadOp ds_op = GetLoadOpForTexture(draw_ds); + const VkRenderPass rp = GetTFXRenderPass(pipe.rt, pipe.ds, pipe.ps.hdr, + config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Stencil, pipe.IsRTFeedbackLoop(), + pipe.IsTestingAndSamplingDepth(), rt_op, ds_op); + const bool is_clearing_rt = (rt_op == VK_ATTACHMENT_LOAD_OP_CLEAR || ds_op == VK_ATTACHMENT_LOAD_OP_CLEAR); + + // Only draw to the active area of the HDR target. Except when depth is cleared, we need to use the full + // buffer size, otherwise it'll only clear the draw part of the depth buffer. + const GSVector4i render_area = (pipe.ps.hdr && ds_op != VK_ATTACHMENT_LOAD_OP_CLEAR) ? config.drawarea : + GSVector4i::loadh(rtsize); + + if (is_clearing_rt) + { + // when we're clearing, we set the draw area to the whole fb, otherwise part of it will be undefined + alignas(16) VkClearValue cvs[2]; + u32 cv_count = 0; + if (draw_rt) + { + GSVector4 clear_color = draw_rt->GetUNormClearColor(); + if (pipe.ps.hdr) + { + // Denormalize clear color for HDR. + clear_color *= GSVector4::cxpr(255.0f / 65535.0f, 255.0f / 65535.0f, 255.0f / 65535.0f, 1.0f); + } + GSVector4::store(&cvs[cv_count++].color, clear_color); + } + if (draw_ds) + cvs[cv_count++].depthStencil = {draw_ds->GetClearDepth(), 0}; + + BeginClearRenderPass(rp, render_area, cvs, cv_count); + } + else + { + BeginRenderPass(rp, render_area); + } + } + } + else { - const VkAttachmentLoadOp rt_op = GetLoadOpForTexture(draw_rt); - const VkAttachmentLoadOp ds_op = GetLoadOpForTexture(draw_ds); - const VkRenderPass rp = GetTFXRenderPass(pipe.rt, pipe.ds, pipe.ps.hdr, - config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Stencil, pipe.IsRTFeedbackLoop(), - pipe.IsTestingAndSamplingDepth(), rt_op, ds_op); - const bool is_clearing_rt = (rt_op == VK_ATTACHMENT_LOAD_OP_CLEAR || ds_op == VK_ATTACHMENT_LOAD_OP_CLEAR); + if (draw_rt) + { + if (draw_rt->GetState() == GSTexture::State::Cleared) + { + EndRenderPass(); + draw_rt->CommitClear(m_current_command_buffer); + } + draw_rt->SetState(GSTexture::State::Dirty); - // Only draw to the active area of the HDR target. Except when depth is cleared, we need to use the full - // buffer size, otherwise it'll only clear the draw part of the depth buffer. - const GSVector4i render_area = (pipe.ps.hdr && ds_op != VK_ATTACHMENT_LOAD_OP_CLEAR) ? config.drawarea : - GSVector4i::loadh(rtsize); + if (draw_rt->GetLayout() != GSTextureVK::Layout::ReadWriteImage) + { + EndRenderPass(); + draw_rt->TransitionToLayout(GSTextureVK::Layout::ReadWriteImage); + } - if (is_clearing_rt) + PSSetShaderResource(TFX_TEXTURE_RT, draw_rt, false); + } + + GSTextureVK* bind_ds = draw_ds; + if (draw_ds) { - // when we're clearing, we set the draw area to the whole fb, otherwise part of it will be undefined - alignas(16) VkClearValue cvs[2]; - u32 cv_count = 0; - if (draw_rt) + if (config.rov_depth) { - GSVector4 clear_color = draw_rt->GetUNormClearColor(); - if (pipe.ps.hdr) + if (draw_rt->GetState() == GSTexture::State::Cleared) { - // Denormalize clear color for HDR. - clear_color *= GSVector4::cxpr(255.0f / 65535.0f, 255.0f / 65535.0f, 255.0f / 65535.0f, 1.0f); + EndRenderPass(); + draw_rt->CommitClear(m_current_command_buffer); } - GSVector4::store(&cvs[cv_count++].color, clear_color); - } - if (draw_ds) - cvs[cv_count++].depthStencil = {draw_ds->GetClearDepth(), 0}; + draw_rt->SetState(GSTexture::State::Dirty); - BeginClearRenderPass(rp, render_area, cvs, cv_count); + if (draw_ds->GetLayout() != GSTextureVK::Layout::ReadWriteImage) + { + EndRenderPass(); + draw_ds->TransitionToLayout(GSTextureVK::Layout::ReadWriteImage); + } + PSSetShaderResource(TFX_TEXTURE_DS, draw_ds, false); + bind_ds = nullptr; + } + else + { + // Need to break the render pass to clear. We could use vkCmdClearAttachments(), but nvidia... + if (draw_ds->GetState() != GSTexture::State::Dirty) + { + if (draw_ds->GetState() == GSTexture::State::Cleared) + EndRenderPass(); + else + draw_ds->SetState(GSTexture::State::Dirty); + } + } } - else + + // Stop the debug layer getting cranky about a previous unused binding. + if (!config.rov_depth && m_current_pipeline_layout != PipelineLayout::ROVTFX) + m_tfx_textures[TFX_TEXTURE_DS] = m_null_texture.get(); + + if (m_current_render_pass != DYNAMIC_RENDERING_RENDER_PASS || m_current_depth_target != bind_ds || + (bind_ds && bind_ds->GetState() == GSTexture::State::Cleared)) { - BeginRenderPass(rp, render_area); + EndRenderPass(); + + m_current_framebuffer_feedback_loop = static_cast( + pipe.feedback_loop_flags & (bind_ds ? (~FeedbackLoopFlag_ReadDS) : FeedbackLoopFlag_None)); + + VkRenderingAttachmentInfoKHR di; + if (bind_ds) + { + bind_ds->TransitionToLayout((m_current_framebuffer_feedback_loop & FeedbackLoopFlag_ReadDS) ? + GSTextureVK::Layout::General : + GSTextureVK::Layout::DepthStencilAttachment); + di = {VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, nullptr, bind_ds->GetView(), + bind_ds->GetVkLayout(), VK_RESOLVE_MODE_NONE, VK_NULL_HANDLE, + VK_IMAGE_LAYOUT_UNDEFINED, GetLoadOpForTexture(bind_ds), VK_ATTACHMENT_STORE_OP_STORE}; + if (di.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + di.clearValue.depthStencil.depth = bind_ds->GetClearDepth(); + } + + const GSVector4i render_area = GSVector4i::loadh(rtsize); + const VkRenderingInfoKHR ri = {VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0, + {{render_area.x, render_area.y}, + {static_cast(render_area.width()), static_cast(render_area.height())}}, + 1, 0, 0, nullptr, bind_ds ? &di : nullptr, nullptr}; + + vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri); + m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS; + m_current_render_pass_area = render_area; + m_current_render_target = nullptr; + m_current_depth_target = bind_ds; } + + // Have to set viewport here, because it's not done in OMSetRenderTargets(). + const VkViewport vp{0.0f, 0.0f, static_cast(rtsize.x), static_cast(rtsize.y), 0.0f, 1.0f}; + SetViewport(vp); + SetScissor(config.scissor); } if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 6fff081482693..db2661175263a 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -43,6 +43,8 @@ class GSDeviceVK final : public GSDevice bool vk_ext_line_rasterization : 1; bool vk_khr_driver_properties : 1; bool vk_khr_shader_non_semantic_info : 1; + bool vk_ext_fragment_shader_interlock : 1; + bool vk_khr_dynamic_rendering : 1; }; // Global state accessors @@ -349,6 +351,7 @@ class GSDeviceVK final : public GSDevice __fi bool IsRTFeedbackLoop() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadAndWriteRT) != 0); } __fi bool IsTestingAndSamplingDepth() const { return ((feedback_loop_flags & FeedbackLoopFlag_ReadDS) != 0); } + __fi bool IsROV() const { return ps.rov; } }; static_assert(sizeof(PipelineSelector) == 24, "Pipeline selector is 24 bytes"); @@ -384,7 +387,9 @@ class GSDeviceVK final : public GSDevice TFX_TEXTURE_RT, TFX_TEXTURE_PRIMID, - NUM_TFX_TEXTURES + NUM_TFX_TEXTURES, + + TFX_TEXTURE_DS = TFX_TEXTURE_PRIMID, // Reuse primid slot, we're not going to use primid date with ROV. }; private: @@ -395,7 +400,9 @@ class GSDeviceVK final : public GSDevice VkDescriptorSetLayout m_tfx_ubo_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_tfx_texture_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_tfx_rov_ds_layout = VK_NULL_HANDLE; VkPipelineLayout m_tfx_pipeline_layout = VK_NULL_HANDLE; + VkPipelineLayout m_tfx_rov_pipeline_layout = VK_NULL_HANDLE; VKStreamBuffer m_vertex_stream_buffer; VKStreamBuffer m_index_stream_buffer; @@ -419,6 +426,7 @@ class GSDeviceVK final : public GSDevice VkPipeline m_hdr_finish_pipelines[2][2] = {}; // [depth][feedback_loop] VkRenderPass m_date_image_setup_render_passes[2][2] = {}; // [depth][clear] VkPipeline m_date_image_setup_pipelines[2][4] = {}; // [depth][datm] + VkPipeline m_rov_depth_begin_pipeline = {}; VkPipeline m_fxaa_pipeline = {}; VkPipeline m_shadeboost_pipeline = {}; @@ -606,13 +614,6 @@ class GSDeviceVK final : public GSDevice void ExecuteCommandBufferAndRestartRenderPass(bool wait_for_completion, const char* reason); void ExecuteCommandBufferForReadback(); - /// Set dirty flags on everything to force re-bind at next draw time. - void InvalidateCachedState(); - - /// Binds all dirty state to the command buffer. - bool ApplyUtilityState(bool already_execed = false); - bool ApplyTFXState(bool already_execed = false); - void SetIndexBuffer(VkBuffer buffer); void SetBlendConstants(u8 color); void SetLineWidth(float width); @@ -624,7 +625,8 @@ class GSDeviceVK final : public GSDevice // Ends a render pass if we're currently in one. // When Bind() is next called, the pass will be restarted. // Calling this function is allowed even if a pass has not begun. - bool InRenderPass(); + bool InRenderPass() { return (m_current_render_pass != VK_NULL_HANDLE); } + bool InDynamicRenderPass() const { return (m_current_render_pass == DYNAMIC_RENDERING_RENDER_PASS); } void BeginRenderPass(VkRenderPass rp, const GSVector4i& rect); void BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, const VkClearValue* cv, u32 cv_count); void BeginClearRenderPass(VkRenderPass rp, const GSVector4i& rect, u32 clear_color); @@ -670,15 +672,25 @@ class GSDeviceVK final : public GSDevice { Undefined, TFX, + ROVTFX, Utility }; + static const VkRenderPass DYNAMIC_RENDERING_RENDER_PASS; + void InitializeState(); bool CreatePersistentDescriptorSets(); + /// Set dirty flags on everything to force re-bind at next draw time. + void InvalidateCachedState(); + void SetInitialState(VkCommandBuffer cmdbuf); void ApplyBaseState(u32 flags, VkCommandBuffer cmdbuf); + /// Binds all dirty state to the command buffer. + bool ApplyUtilityState(bool already_execed = false); + bool ApplyTFXState(PipelineLayout layout, bool already_execed = false); + // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = 0; FeedbackLoopFlag m_current_framebuffer_feedback_loop = FeedbackLoopFlag_None; @@ -703,6 +715,7 @@ class GSDeviceVK final : public GSDevice VkDescriptorSet m_tfx_ubo_descriptor_set = VK_NULL_HANDLE; VkDescriptorSet m_tfx_texture_descriptor_set = VK_NULL_HANDLE; VkDescriptorSet m_tfx_rt_descriptor_set = VK_NULL_HANDLE; + VkDescriptorSet m_tfx_image_descriptor_set = VK_NULL_HANDLE; std::array m_tfx_dynamic_offsets{}; const GSTextureVK* m_utility_texture = nullptr; diff --git a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp index 678c1c8c52a55..5bd2cf6809f0d 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp @@ -90,9 +90,12 @@ std::unique_ptr GSTextureVK::Create(Type type, Format format, int w case Type::RenderTarget: { + const bool storage = (g_gs_device->Features().raster_order_view && + (format == Format::Color || format == Format::ColorDepth)); + pxAssert(levels == 1); ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | (storage ? VK_IMAGE_USAGE_STORAGE_BIT : 0); } break; diff --git a/pcsx2/GS/Renderers/Vulkan/VKBuilders.cpp b/pcsx2/GS/Renderers/Vulkan/VKBuilders.cpp index 01e7ff750fc29..4c166163d9ef3 100644 --- a/pcsx2/GS/Renderers/Vulkan/VKBuilders.cpp +++ b/pcsx2/GS/Renderers/Vulkan/VKBuilders.cpp @@ -264,6 +264,9 @@ void Vulkan::GraphicsPipelineBuilder::Clear() m_line_rasterization_state = {}; m_line_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT; + m_rendering = {}; + m_rendering.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR; + // set defaults SetNoCullRasterizationState(); SetNoDepthTestState(); @@ -571,6 +574,29 @@ void Vulkan::GraphicsPipelineBuilder::SetProvokingVertex(VkProvokingVertexModeEX m_provoking_vertex.provokingVertexMode = mode; } +void Vulkan::GraphicsPipelineBuilder::SetDynamicRendering() +{ + AddPointerToChain(&m_ci, &m_rendering); +} + +void Vulkan::GraphicsPipelineBuilder::AddDynamicRenderingColorAttachment(VkFormat format) +{ + SetDynamicRendering(); + + pxAssert(m_rendering.colorAttachmentCount < MAX_ATTACHMENTS); + m_rendering_color_formats[m_rendering.colorAttachmentCount++] = format; + + m_rendering.pColorAttachmentFormats = m_rendering_color_formats.data(); +} + +void Vulkan::GraphicsPipelineBuilder::SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format) +{ + SetDynamicRendering(); + + m_rendering.depthAttachmentFormat = depth_format; + m_rendering.stencilAttachmentFormat = stencil_format; +} + Vulkan::ComputePipelineBuilder::ComputePipelineBuilder() { Clear(); @@ -728,7 +754,8 @@ void Vulkan::DescriptorSetUpdateBuilder::PushUpdate( } void Vulkan::DescriptorSetUpdateBuilder::AddImageDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, - VkImageLayout layout /*= VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL*/) + VkDescriptorType type /* = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE */, + VkImageLayout layout /* = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL */) { pxAssert(m_num_writes < MAX_WRITES && m_num_image_infos < MAX_IMAGE_INFOS); @@ -742,7 +769,7 @@ void Vulkan::DescriptorSetUpdateBuilder::AddImageDescriptorWrite(VkDescriptorSet dw.dstSet = set; dw.dstBinding = binding; dw.descriptorCount = 1; - dw.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + dw.descriptorType = type; dw.pImageInfo = ⅈ } @@ -883,25 +910,6 @@ void Vulkan::DescriptorSetUpdateBuilder::AddInputAttachmentDescriptorWrite( ii.sampler = VK_NULL_HANDLE; } -void Vulkan::DescriptorSetUpdateBuilder::AddStorageImageDescriptorWrite( - VkDescriptorSet set, u32 binding, VkImageView view, VkImageLayout layout /*= VK_IMAGE_LAYOUT_GENERAL*/) -{ - pxAssert(m_num_writes < MAX_WRITES && m_num_image_infos < MAX_IMAGE_INFOS); - - VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; - dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - dw.dstSet = set; - dw.dstBinding = binding; - dw.descriptorCount = 1; - dw.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - dw.pImageInfo = &m_image_infos[m_num_image_infos]; - - VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; - ii.imageView = view; - ii.imageLayout = layout; - ii.sampler = VK_NULL_HANDLE; -} - Vulkan::FramebufferBuilder::FramebufferBuilder() { Clear(); diff --git a/pcsx2/GS/Renderers/Vulkan/VKBuilders.h b/pcsx2/GS/Renderers/Vulkan/VKBuilders.h index bc75f29690129..4fac345ff66fb 100644 --- a/pcsx2/GS/Renderers/Vulkan/VKBuilders.h +++ b/pcsx2/GS/Renderers/Vulkan/VKBuilders.h @@ -141,6 +141,10 @@ namespace Vulkan void SetProvokingVertex(VkProvokingVertexModeEXT mode); + void SetDynamicRendering(); + void AddDynamicRenderingColorAttachment(VkFormat format); + void SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format); + private: VkGraphicsPipelineCreateInfo m_ci; std::array m_shader_stages; @@ -168,6 +172,9 @@ namespace Vulkan VkPipelineRasterizationProvokingVertexStateCreateInfoEXT m_provoking_vertex; VkPipelineRasterizationLineStateCreateInfoEXT m_line_rasterization_state; + + VkPipelineRenderingCreateInfoKHR m_rendering; + std::array m_rendering_color_formats; }; class ComputePipelineBuilder @@ -241,6 +248,7 @@ namespace Vulkan bool clear = true); void AddImageDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, + VkDescriptorType type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); void AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler); void AddSamplerDescriptorWrites(VkDescriptorSet set, u32 binding, const VkSampler* samplers, u32 num_samplers); @@ -253,8 +261,6 @@ namespace Vulkan void AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBufferView view); void AddInputAttachmentDescriptorWrite( VkDescriptorSet set, u32 binding, VkImageView view, VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL); - void AddStorageImageDescriptorWrite( - VkDescriptorSet set, u32 binding, VkImageView view, VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL); private: std::array m_writes; diff --git a/pcsx2/GS/Renderers/Vulkan/VKEntryPoints.inl b/pcsx2/GS/Renderers/Vulkan/VKEntryPoints.inl index afe5263f7d4bb..ea231e9658580 100644 --- a/pcsx2/GS/Renderers/Vulkan/VKEntryPoints.inl +++ b/pcsx2/GS/Renderers/Vulkan/VKEntryPoints.inl @@ -238,4 +238,14 @@ VULKAN_DEVICE_ENTRY_POINT(vkGetCalibratedTimestampsEXT, false) // VK_KHR_push_descriptor VULKAN_DEVICE_ENTRY_POINT(vkCmdPushDescriptorSetKHR, false) +// VK_KHR_create_renderpass2 +VULKAN_DEVICE_ENTRY_POINT(vkCmdBeginRenderPass2KHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkCmdEndRenderPass2KHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkCmdNextSubpass2KHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkCreateRenderPass2KHR, false) + +// VK_KHR_dynamic_rendering +VULKAN_DEVICE_ENTRY_POINT(vkCmdBeginRenderingKHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkCmdEndRenderingKHR, false) + #endif // VULKAN_DEVICE_ENTRY_POINT diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 228f7ae9a032c..e364da55c6955 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -739,6 +739,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(UserHacks_TextureInsideRt) && OpEqu(UserHacks_BilinearHack) && OpEqu(OverrideTextureBarriers) && + OpEqu(OverrideRasterizerOrderViews) && OpEqu(CAS_Sharpness) && OpEqu(ShadeBoost_Brightness) && @@ -785,6 +786,7 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons OpEqu(DisableVertexShaderExpand) && OpEqu(DisableThreadedPresentation) && OpEqu(OverrideTextureBarriers) && + OpEqu(OverrideRasterizerOrderViews) && OpEqu(ExclusiveFullscreenControl); } @@ -929,6 +931,7 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap) GSSettingIntEnumEx(UserHacks_GPUTargetCLUTMode, "UserHacks_GPUTargetCLUTMode"); GSSettingIntEnumEx(TriFilter, "TriFilter"); GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers"); + GSSettingIntEx(OverrideRasterizerOrderViews, "OverrideRasterizerOrderViews"); GSSettingInt(ShadeBoost_Brightness); GSSettingInt(ShadeBoost_Contrast);