From 66ad59db9227be3380b831cdfd959fd1d9a619b1 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 22 Nov 2024 19:44:49 +0100 Subject: [PATCH 1/4] implement state 5 through a draw call --- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 48 ++++++++++++--- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 4 ++ .../Renderer/Metal/UtilityShaderSource.h | 59 +++++++++---------- 3 files changed, 74 insertions(+), 37 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 4a6c99539..cfb63fc5a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -21,6 +21,9 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Metal/MTLPixelFormat.hpp" +#include "Metal/MTLRenderCommandEncoder.hpp" +#include "Metal/MTLRenderPipeline.hpp" #include "config/CemuConfig.h" #include "gui/guiWrapper.h" @@ -125,7 +128,26 @@ MetalRenderer::MetalRenderer() if (error) { cemuLog_log(LogType::Force, "failed to create utility library (error: {})", error->localizedDescription()->utf8String()); - return; + } + + // Pipelines + MTL::Function* vertexFullscreenFunction = utilityLibrary->newFunction(ToNSString("vertexFullscreen")); + MTL::Function* fragmentCopyDepthToColorFunction = utilityLibrary->newFunction(ToNSString("fragmentCopyDepthToColor")); + + MTL::RenderPipelineDescriptor* rpd = MTL::RenderPipelineDescriptor::alloc()->init(); + rpd->setVertexFunction(vertexFullscreenFunction); + rpd->setFragmentFunction(fragmentCopyDepthToColorFunction); + // TODO: don't hardcode the format + rpd->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatR16Unorm); + + vertexFullscreenFunction->release(); + fragmentCopyDepthToColorFunction->release(); + + error = nullptr; + m_copyDepthToColorPipeline = m_device->newRenderPipelineState(rpd, &error); + if (error) + { + cemuLog_log(LogType::Force, "failed to create copy depth to color pipeline (error: {})", error->localizedDescription()->utf8String()); } // Void vertex pipelines @@ -142,8 +164,7 @@ MetalRenderer::~MetalRenderer() //delete m_copyTextureToTexturePipeline; //delete m_restrideBufferPipeline; - //m_presentPipelineLinear->release(); - //m_presentPipelineSRGB->release(); + m_copyDepthToColorPipeline->release(); delete m_outputShaderCache; delete m_pipelineCache; @@ -1348,14 +1369,27 @@ void MetalRenderer::draw_handleSpecialState5() LatteTextureView* colorBuffer = LatteMRT::GetColorAttachment(0); LatteTextureView* depthBuffer = LatteMRT::GetDepthAttachment(); + auto mtlDepthTexture = static_cast(depthBuffer)->GetRGBAView(); sint32 vpWidth, vpHeight; LatteMRT::GetVirtualViewportDimensions(vpWidth, vpHeight); - surfaceCopy_copySurfaceWithFormatConversion( - depthBuffer->baseTexture, depthBuffer->firstMip, depthBuffer->firstSlice, - colorBuffer->baseTexture, colorBuffer->firstMip, colorBuffer->firstSlice, - vpWidth, vpHeight); + // Sadly, we need to end encoding to ensure that the depth data is up-to-date + + // Copy depth to color + auto renderCommandEncoder = GetRenderCommandEncoder(); + + auto& encoderState = m_state.m_encoderState; + + renderCommandEncoder->setRenderPipelineState(m_copyDepthToColorPipeline); + // TODO: make a helper function for this + encoderState.m_renderPipelineState = m_copyDepthToColorPipeline; + SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_FRAGMENT, mtlDepthTexture, GET_HELPER_TEXTURE_BINDING(0)); + // TODO: make a helper function for this + renderCommandEncoder->setFragmentBytes(&vpWidth, sizeof(sint32), GET_HELPER_BUFFER_BINDING(0)); + encoderState.m_buffers[METAL_SHADER_TYPE_FRAGMENT][GET_HELPER_BUFFER_BINDING(0)] = {nullptr}; + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 2f8514892..6d5bea6c6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -6,6 +6,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" +#include "Metal/MTLRenderPipeline.hpp" #include struct MetalBufferAllocation @@ -471,6 +472,9 @@ class MetalRenderer : public Renderer class MetalDepthStencilCache* m_depthStencilCache; class MetalSamplerCache* m_samplerCache; + // Pipelines + MTL::RenderPipelineState* m_copyDepthToColorPipeline; + // Void vertex pipelines class MetalVoidVertexPipeline* m_copyBufferToBufferPipeline; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h index 9fba19467..2041f4f88 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h @@ -8,24 +8,24 @@ using namespace metal; #define GET_BUFFER_BINDING(index) (28 + index) #define GET_TEXTURE_BINDING(index) (29 + index) -#define GET_SAMPLER_BINDING(index) (14 + index)\n +#define GET_SAMPLER_BINDING(index) (14 + index) + +constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; + +struct VertexOut { + float4 position [[position]]; + float2 texCoord; +}; + +vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { + VertexOut out; + out.position = float4(positions[vid], 0.0, 1.0); + out.texCoord = positions[vid] * 0.5 + 0.5; + out.texCoord.y = 1.0 - out.texCoord.y; + + return out; +} -//constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; -// -//struct VertexOut { -// float4 position [[position]]; -// float2 texCoord; -//}; -// -//vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { -// VertexOut out; -// out.position = float4(positions[vid], 0.0, 1.0); -// out.texCoord = positions[vid] * 0.5 + 0.5; -// out.texCoord.y = 1.0 - out.texCoord.y; -// -// return out; -//} -// //fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], //sampler samplr [[sampler(0)]]) { // return tex.sample(samplr, in.texCoord); //} @@ -34,19 +34,18 @@ vertex void vertexCopyBufferToBuffer(uint vid [[vertex_id]], device uint8_t* src dst[vid] = src[vid]; } -//vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d src [[texture(GET_TEXTURE_BINDING(0))]], texture2d dst [[texture(GET_TEXTURE_BINDING(1))]], constant uint32_t& width [[buffer(GET_BUFFER_BINDING(0))]]) { -// uint2 coord = uint2(vid % width, vid / width); -// return dst.write(float4(src.read(coord).r, 0.0, 0.0, 0.0), coord); -//} +fragment float4 fragmentCopyDepthToColor(VertexOut in [[stage_in]], texture2d src [[texture(GET_TEXTURE_BINDING(0))]]) { + return float4(src.read(uint2(in.position.xy)).r, 0.0, 0.0, 0.0); +} -struct RestrideParams { - uint oldStride; - uint newStride; -}; +//struct RestrideParams { +// uint oldStride; +// uint newStride; +//}; -vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) { - for (uint32_t i = 0; i < params.oldStride; i++) { - dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; - } -} +//vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer//(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant //RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) { +// for (uint32_t i = 0; i < params.oldStride; i++) { +// dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; +// } +//} )"; From 00857b233b6c92dc3ecb5c4b0341d960f4a1665a Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 22 Nov 2024 20:03:41 +0100 Subject: [PATCH 2/4] support arbitrary pixel formats for state 5 --- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 45 +++++++++++-------- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 3 +- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index cfb63fc5a..a29a23568 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -134,22 +134,12 @@ MetalRenderer::MetalRenderer() MTL::Function* vertexFullscreenFunction = utilityLibrary->newFunction(ToNSString("vertexFullscreen")); MTL::Function* fragmentCopyDepthToColorFunction = utilityLibrary->newFunction(ToNSString("fragmentCopyDepthToColor")); - MTL::RenderPipelineDescriptor* rpd = MTL::RenderPipelineDescriptor::alloc()->init(); - rpd->setVertexFunction(vertexFullscreenFunction); - rpd->setFragmentFunction(fragmentCopyDepthToColorFunction); - // TODO: don't hardcode the format - rpd->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatR16Unorm); - + m_copyDepthToColorDesc = MTL::RenderPipelineDescriptor::alloc()->init(); + m_copyDepthToColorDesc->setVertexFunction(vertexFullscreenFunction); + m_copyDepthToColorDesc->setFragmentFunction(fragmentCopyDepthToColorFunction); vertexFullscreenFunction->release(); fragmentCopyDepthToColorFunction->release(); - error = nullptr; - m_copyDepthToColorPipeline = m_device->newRenderPipelineState(rpd, &error); - if (error) - { - cemuLog_log(LogType::Force, "failed to create copy depth to color pipeline (error: {})", error->localizedDescription()->utf8String()); - } - // Void vertex pipelines if (m_isAppleGPU) m_copyBufferToBufferPipeline = new MetalVoidVertexPipeline(this, utilityLibrary, "vertexCopyBufferToBuffer"); @@ -164,7 +154,9 @@ MetalRenderer::~MetalRenderer() //delete m_copyTextureToTexturePipeline; //delete m_restrideBufferPipeline; - m_copyDepthToColorPipeline->release(); + m_copyDepthToColorDesc->release(); + for (const auto [pixelFormat, pipeline] : m_copyDepthToColorPipelines) + pipeline->release(); delete m_outputShaderCache; delete m_pipelineCache; @@ -1369,22 +1361,39 @@ void MetalRenderer::draw_handleSpecialState5() LatteTextureView* colorBuffer = LatteMRT::GetColorAttachment(0); LatteTextureView* depthBuffer = LatteMRT::GetDepthAttachment(); - auto mtlDepthTexture = static_cast(depthBuffer)->GetRGBAView(); + auto colorTextureMtl = static_cast(colorBuffer); + auto depthTextureMtl = static_cast(depthBuffer); sint32 vpWidth, vpHeight; LatteMRT::GetVirtualViewportDimensions(vpWidth, vpHeight); + // Get the pipeline + MTL::PixelFormat colorPixelFormat = colorTextureMtl->GetRGBAView()->pixelFormat(); + auto& pipeline = m_copyDepthToColorPipelines[colorPixelFormat]; + if (!pipeline) + { + m_copyDepthToColorDesc->colorAttachments()->object(0)->setPixelFormat(colorPixelFormat); + + NS::Error* error = nullptr; + pipeline = m_device->newRenderPipelineState(m_copyDepthToColorDesc, &error); + if (error) + { + cemuLog_log(LogType::Force, "failed to create copy depth to color pipeline (error: {})", error->localizedDescription()->utf8String()); + } + } + // Sadly, we need to end encoding to ensure that the depth data is up-to-date + EndEncoding(); // Copy depth to color auto renderCommandEncoder = GetRenderCommandEncoder(); auto& encoderState = m_state.m_encoderState; - renderCommandEncoder->setRenderPipelineState(m_copyDepthToColorPipeline); + renderCommandEncoder->setRenderPipelineState(pipeline); // TODO: make a helper function for this - encoderState.m_renderPipelineState = m_copyDepthToColorPipeline; - SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_FRAGMENT, mtlDepthTexture, GET_HELPER_TEXTURE_BINDING(0)); + encoderState.m_renderPipelineState = pipeline; + SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_FRAGMENT, depthTextureMtl->GetRGBAView(), GET_HELPER_TEXTURE_BINDING(0)); // TODO: make a helper function for this renderCommandEncoder->setFragmentBytes(&vpWidth, sizeof(sint32), GET_HELPER_BUFFER_BINDING(0)); encoderState.m_buffers[METAL_SHADER_TYPE_FRAGMENT][GET_HELPER_BUFFER_BINDING(0)] = {nullptr}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 6d5bea6c6..010f3f922 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -473,7 +473,8 @@ class MetalRenderer : public Renderer class MetalSamplerCache* m_samplerCache; // Pipelines - MTL::RenderPipelineState* m_copyDepthToColorPipeline; + MTL::RenderPipelineDescriptor* m_copyDepthToColorDesc; + std::map m_copyDepthToColorPipelines; // Void vertex pipelines class MetalVoidVertexPipeline* m_copyBufferToBufferPipeline; From 14258cdb284215ef1031b37507e6726a44ad24d6 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 23 Nov 2024 08:39:50 +0100 Subject: [PATCH 3/4] Revert "only set array length for texture view arrays" This reverts commit d9f857bcc4588e02b5dd7980299da9cceea288cf. --- .../Renderer/Metal/LatteTextureViewMtl.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp index e77e47156..5374126ac 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp @@ -2,7 +2,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Metal/MTLTexture.hpp" uint32 LatteTextureMtl_AdjustTextureCompSel(Latte::E_GX2SURFFMT format, uint32 compSel) { @@ -159,21 +158,21 @@ MTL::Texture* LatteTextureViewMtl::CreateSwizzledView(uint32 gpuSamplerSwizzle) uint32 baseLevel = firstMip; uint32 levelCount = this->numMip; - uint32 baseLayer = 0; - uint32 layerCount = 1; - - // TODO: check if base texture is 3D texture as well? + uint32 baseLayer; + uint32 layerCount; + // TODO: check if base texture is 3D texture as well if (textureType == MTL::TextureType3D) { cemu_assert_debug(firstMip == 0); cemu_assert_debug(this->numSlice == baseTexture->depth); + baseLayer = 0; + layerCount = 1; } - // Cube array needs to have layer count multiple of 6 as opposed to when creating a texture - else if (textureType == MTL::TextureTypeCubeArray || textureType == MTL::TextureType2DArray) - { - baseLayer = firstSlice; + else + { + baseLayer = firstSlice; layerCount = this->numSlice; - } + } MTL::TextureSwizzleChannels swizzle; swizzle.red = GetMtlTextureSwizzle(compSelR); From 009dab8a280441baada89999a241c36d4954df57 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 23 Nov 2024 08:41:00 +0100 Subject: [PATCH 4/4] remove useless includes --- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 3 --- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h | 1 - 2 files changed, 4 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index a29a23568..890295127 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -21,9 +21,6 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h" -#include "Metal/MTLPixelFormat.hpp" -#include "Metal/MTLRenderCommandEncoder.hpp" -#include "Metal/MTLRenderPipeline.hpp" #include "config/CemuConfig.h" #include "gui/guiWrapper.h" diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 010f3f922..3f508ae8c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -6,7 +6,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" -#include "Metal/MTLRenderPipeline.hpp" #include struct MetalBufferAllocation