From 8b783e63dc8bbd23cc5eb5fd0033295e10175869 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sun, 13 Oct 2024 09:20:45 +0200 Subject: [PATCH 01/20] refactor pipeline cache --- src/Cafe/CMakeLists.txt | 2 + src/Cafe/HW/Latte/Core/LatteShaderCache.cpp | 21 - .../Renderer/Metal/MetalPipelineCache.cpp | 587 ++--------------- .../Latte/Renderer/Metal/MetalPipelineCache.h | 22 +- .../Renderer/Metal/MetalPipelineCompiler.cpp | 596 ++++++++++++++++++ .../Renderer/Metal/MetalPipelineCompiler.h | 38 ++ .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 7 +- 7 files changed, 680 insertions(+), 593 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 9b5f8d3ed..3d1a02305 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -560,6 +560,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/MetalMemoryManager.h HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp HW/Latte/Renderer/Metal/MetalOutputShaderCache.h + HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp + HW/Latte/Renderer/Metal/MetalPipelineCompiler.h HW/Latte/Renderer/Metal/MetalPipelineCache.cpp HW/Latte/Renderer/Metal/MetalPipelineCache.h HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index 4659ff10a..cdb41184e 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -160,18 +160,11 @@ bool LoadTGAFile(const std::vector& buffer, TGAFILE *tgaFile) void LatteShaderCache_finish() { if (g_renderer->GetType() == RendererAPI::Vulkan) - { RendererShaderVk::ShaderCacheLoading_end(); - } else if (g_renderer->GetType() == RendererAPI::OpenGL) - { RendererShaderGL::ShaderCacheLoading_end(); - } else if (g_renderer->GetType() == RendererAPI::Metal) - { RendererShaderMtl::ShaderCacheLoading_end(); - MetalPipelineCache::ShaderCacheLoading_end(); - } } uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId) @@ -251,18 +244,11 @@ void LatteShaderCache_Load() fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec); // initialize renderer specific caches if (g_renderer->GetType() == RendererAPI::Vulkan) - { RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId); - } else if (g_renderer->GetType() == RendererAPI::OpenGL) - { RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId); - } else if (g_renderer->GetType() == RendererAPI::Metal) - { RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId); - MetalPipelineCache::ShaderCacheLoading_begin(cacheTitleId); - } // get cache file name const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId); const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0 @@ -791,18 +777,11 @@ void LatteShaderCache_Close() s_shaderCacheGeneric = nullptr; } if (g_renderer->GetType() == RendererAPI::Vulkan) - { RendererShaderVk::ShaderCacheLoading_Close(); - } else if (g_renderer->GetType() == RendererAPI::OpenGL) - { RendererShaderGL::ShaderCacheLoading_Close(); - } else if (g_renderer->GetType() == RendererAPI::Metal) - { RendererShaderMtl::ShaderCacheLoading_Close(); - MetalPipelineCache::ShaderCacheLoading_Close(); - } // if Vulkan then also close pipeline cache if (g_renderer->GetType() == RendererAPI::Vulkan) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 29459539f..a70f75418 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,517 +1,13 @@ -#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" -#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" -#include "Cemu/Logging/CemuLogging.h" -#include "HW/Latte/Core/LatteConst.h" -#include "config/ActiveSettings.h" +#include "Cafe/HW/Latte/Core/LatteConst.h" -static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) -{ - auto parameterMask = vertexShader->outputParameterMask; - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) - continue; - gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId)); - } - gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx)); - gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx)); -} - -static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) -{ - auto parameterMask = vertexShader->outputParameterMask; - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) - continue; - gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); - } - gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant)); - gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n")); -} - -static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) -{ - sint32 pList[4] = { p0, p1, p2, p3 }; - for (sint32 i = 0; i < 4; i++) - { - if (pList[i] == 3) - rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); - else - rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); - } - gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0])); - gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1])); - gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2])); - gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1])); - gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2])); - gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3])); -} - -static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) -{ - std::string gsSrc; - gsSrc.append("#include \r\n"); - gsSrc.append("using namespace metal;\r\n"); - - LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); - - // inputs & outputs - std::string vertexOutDefinition = "struct VertexOut {\r\n"; - vertexOutDefinition += "float4 position;\r\n"; - std::string geometryOutDefinition = "struct GeometryOut {\r\n"; - geometryOutDefinition += "float4 position [[position]];\r\n"; - auto parameterMask = vertexShader->outputParameterMask; - for (sint32 f = 0; f < 2; f++) - { - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); - if (psImport == nullptr) - continue; - - if (f == 0) - { - vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); - } - else - { - geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); - - geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId)); - if (psImport->isFlat) - geometryOutDefinition += " [[flat]]"; - if (psImport->isNoPerspective) - geometryOutDefinition += " [[center_no_perspective]]"; - geometryOutDefinition += ";\r\n"; - } - } - } - vertexOutDefinition += "};\r\n"; - geometryOutDefinition += "};\r\n"; - - gsSrc.append(vertexOutDefinition); - gsSrc.append(geometryOutDefinition); - - gsSrc.append("struct ObjectPayload {\r\n"); - gsSrc.append("VertexOut vertexOut[3];\r\n"); - gsSrc.append("};\r\n"); - - // gen function - gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("return b - (c - a);\r\n"); - gsSrc.append("}\r\n"); - - gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("return c - (b - a);\r\n"); - gsSrc.append("}\r\n"); - - gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("return c + (b - a);\r\n"); - gsSrc.append("}\r\n"); - - // main - gsSrc.append("using MeshType = mesh;\r\n"); - gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n"); - gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("GeometryOut out;\r\n"); - - // there are two possible winding orders that need different triangle generation: - // 0 1 - // 2 3 - // and - // 0 1 - // 3 2 - // all others are just symmetries of these cases - - // we can determine the case by comparing the distance 0<->1 and 0<->2 - - gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); - gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); - gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n"); - - // emit vertices - gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n"); - gsSrc.append("{\r\n"); - // p0 to p1 is diagonal - rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister); - gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n"); - // p0 to p2 is diagonal - rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister); - gsSrc.append("} else {\r\n"); - // p1 to p2 is diagonal - rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister); - gsSrc.append("}\r\n"); - - gsSrc.append("mesh.set_primitive_count(2);\r\n"); - - gsSrc.append("}\r\n"); - - auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); - mtlShader->PreponeCompilation(true); - - return mtlShader; -} - -#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF - -uint64 s_cacheTitleId = INVALID_TITLE_ID; - -extern std::atomic_int g_compiled_shaders_total; -extern std::atomic_int g_compiled_shaders_async; - -template -void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) -{ - // Rasterization - bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); - - // HACK - // TODO: include this in the hash? - if (!lcr.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) - rasterizationEnabled = true; - - // Culling both front and back faces effectively disables rasterization - const auto& polygonControlReg = lcr.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - rasterizationEnabled = false; - - auto pixelShaderMtl = static_cast(pixelShader->shader); - - if (!rasterizationEnabled || !pixelShaderMtl) - { - desc->setRasterizationEnabled(false); - return; - } - - desc->setFragmentFunction(pixelShaderMtl->GetFunction()); - - // Color attachments - const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; - uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); - uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); - for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) - { - const auto& colorBuffer = lastUsedFBO->colorBuffer[i]; - auto texture = static_cast(colorBuffer.texture); - if (!texture) - { - continue; - } - auto colorAttachment = desc->colorAttachments()->object(i); - colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); - - // Disable writes if not in the active FBO - if (!activeFBO->colorBuffer[i].texture) - { - colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); - continue; - } - - colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF)); - - // Blending - bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; - // Only float data type is blendable - if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) - { - colorAttachment->setBlendingEnabled(true); - - const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i]; - - auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); - auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); - auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); - - colorAttachment->setRgbBlendOperation(rgbBlendOp); - colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); - if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) - { - colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); - colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); - colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); - } - else - { - colorAttachment->setAlphaBlendOperation(rgbBlendOp); - colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); - } - } - } - - // Depth stencil attachment - if (lastUsedFBO->depthBuffer.texture) - { - auto texture = static_cast(lastUsedFBO->depthBuffer.texture); - desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - if (lastUsedFBO->depthBuffer.hasStencil) - { - desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - } - } -} - -void MetalPipelineCache::ShaderCacheLoading_begin(uint64 cacheTitleId) -{ - s_cacheTitleId = cacheTitleId; -} - -void MetalPipelineCache::ShaderCacheLoading_end() -{ -} - -void MetalPipelineCache::ShaderCacheLoading_Close() -{ - g_compiled_shaders_total = 0; - g_compiled_shaders_async = 0; -} - -MetalPipelineCache::~MetalPipelineCache() -{ - for (auto& pair : m_pipelineCache) - { - pair.second->release(); - } - m_pipelineCache.clear(); - - NS::Error* error = nullptr; - m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error); - if (error) - { - cemuLog_log(LogType::Force, "error serializing binary archive: {}", error->localizedDescription()->utf8String()); - error->release(); - } - m_binaryArchive->release(); - - m_binaryArchiveURL->release(); -} - -MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) -{ - uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, lastUsedFBO, lcr); - auto& pipeline = m_pipelineCache[stateHash]; - if (pipeline) - return pipeline; - - auto vertexShaderMtl = static_cast(vertexShader->shader); - - // Render pipeline state - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexShaderMtl->GetFunction()); - - // Vertex descriptor - if (!fetchShader->mtlFetchVertexManually) - { - MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); - for (auto& bufferGroup : fetchShader->bufferGroups) - { - std::optional fetchType; - - uint32 minBufferStride = 0; - for (sint32 j = 0; j < bufferGroup.attribCount; ++j) - { - auto& attr = bufferGroup.attrib[j]; - - uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; - if (semanticId == (uint32)-1) - continue; // attribute not used? - - auto attribute = vertexDescriptor->attributes()->object(semanticId); - attribute->setOffset(attr.offset); - attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); - attribute->setFormat(GetMtlVertexFormat(attr.format)); - - minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format)); - - if (fetchType.has_value()) - cemu_assert_debug(fetchType == attr.fetchType); - else - fetchType = attr.fetchType; - - if (attr.fetchType == LatteConst::INSTANCE_DATA) - { - cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported - } - } - - uint32 bufferIndex = bufferGroup.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); - if (bufferStride == 0) - { - // Buffer stride cannot be zero, let's use the minimum stride - bufferStride = minBufferStride; - - // Additionally, constant vertex function must be used - layout->setStepFunction(MTL::VertexStepFunctionConstant); - layout->setStepRate(0); - } - else - { - if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerVertex); - else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerInstance); - else - { - debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); - cemu_assert(false); - } - } - bufferStride = Align(bufferStride, 4); - layout->setStride(bufferStride); - } - - // TODO: don't always set the vertex descriptor? - desc->setVertexDescriptor(vertexDescriptor); - vertexDescriptor->release(); - } - - SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); - - TryLoadBinaryArchive(); - - // Load binary - if (m_binaryArchive) - { - NS::Object* binArchives[] = {m_binaryArchive}; - auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); - desc->setBinaryArchives(binaryArchives); - binaryArchives->release(); - } - - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Cached render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); - - // Pipeline wasn't found in the binary archive, we need to compile it - if (error) - { - desc->setBinaryArchives(nullptr); - - error->release(); - error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("New render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - else - { - // Save binary - if (m_binaryArchive) - { - NS::Error* error = nullptr; - m_binaryArchive->addRenderPipelineFunctions(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - } - } - desc->release(); - - return pipeline; -} - -MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType) -{ - uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, lastUsedFBO, lcr); - - stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; - stateHash = std::rotl(stateHash, 7); - - stateHash += (uint8)hostIndexType; - stateHash = std::rotl(stateHash, 7); - - auto& pipeline = m_pipelineCache[stateHash]; - if (pipeline) - return pipeline; - - auto objectShaderMtl = static_cast(vertexShader->shader); - RendererShaderMtl* meshShaderMtl; - if (geometryShader) - { - meshShaderMtl = static_cast(geometryShader->shader); - } - else - { - // If there is no geometry shader, it means that we are emulating rects - meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); - } - - // Render pipeline state - MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); - desc->setObjectFunction(objectShaderMtl->GetFunction()); - desc->setMeshFunction(meshShaderMtl->GetFunction()); - - SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); - - TryLoadBinaryArchive(); - - // Load binary - // TODO: no binary archives? :( - - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Mesh pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); - desc->release(); - if (error) - { - cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - - return pipeline; -} - -uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, const LatteContextRegister& lcr) +uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { // Hash uint64 stateHash = 0; @@ -523,6 +19,12 @@ uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* f stateHash += textureView->GetRGBAView()->pixelFormat() + i * 31; stateHash = std::rotl(stateHash, 7); + + if (activeFBO->colorBuffer[i].texture) + { + stateHash += 1; + stateHash = std::rotl(stateHash, 1); + } } if (lastUsedFBO->depthBuffer.texture) @@ -530,6 +32,12 @@ uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* f auto textureView = static_cast(lastUsedFBO->depthBuffer.texture); stateHash += textureView->GetRGBAView()->pixelFormat(); stateHash = std::rotl(stateHash, 7); + + if (activeFBO->depthBuffer.texture) + { + stateHash += 1; + stateHash = std::rotl(stateHash, 1); + } } for (auto& group : fetchShader->bufferGroups) @@ -586,55 +94,38 @@ uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* f } } - return stateHash; -} + // Mesh pipeline + const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); + bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); -void MetalPipelineCache::TryLoadBinaryArchive() -{ - if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID) - return; + bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); - // GPU name - const char* deviceName1 = m_mtlr->GetDevice()->name()->utf8String(); - std::string deviceName; - deviceName.assign(deviceName1); - - // Replace spaces with underscores - for (auto& c : deviceName) + if (usesGeometryShader) { - if (c == ' ') - c = '_'; + stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; + stateHash = std::rotl(stateHash, 7); } - // OS version - auto osVersion = NS::ProcessInfo::processInfo()->operatingSystemVersion(); - - // Precompiled binaries cannot be shared between different devices or OS versions - const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId); - const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}/{}-{}-{}/{}", deviceName, osVersion.majorVersion, osVersion.minorVersion, osVersion.patchVersion, cacheFilename); - - // Create the directory if it doesn't exist - std::filesystem::create_directories(cachePath.parent_path()); + return stateHash; +} - m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str())); +MetalPipelineCache::~MetalPipelineCache() +{ + for (auto& [key, value] : m_pipelineCache) + { + value->release(); + } +} - MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init(); - desc->setUrl(m_binaryArchiveURL); +MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + auto& pipeline = m_pipelineCache[CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr)]; + if (pipeline) + return pipeline; - NS::Error* error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - desc->setUrl(nullptr); + MetalPipelineCompiler compiler(m_mtlr); + compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + pipeline = compiler.Compile(false, true); - error->release(); - error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - desc->release(); + return pipeline; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index 916a90728..18b163f6d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -1,24 +1,17 @@ #pragma once -#include - -#include "HW/Latte/ISA/LatteReg.h" -#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" -#include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" +// TODO: binary archives class MetalPipelineCache { public: - static void ShaderCacheLoading_begin(uint64 cacheTitleId); - static void ShaderCacheLoading_end(); - static void ShaderCacheLoading_Close(); + static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} ~MetalPipelineCache(); - MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); - - MTL::RenderPipelineState* GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType); + MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); // Debug size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); } @@ -27,11 +20,4 @@ class MetalPipelineCache class MetalRenderer* m_mtlr; std::map m_pipelineCache; - - NS::URL* m_binaryArchiveURL; - MTL::BinaryArchive* m_binaryArchive; - - uint64 CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, const LatteContextRegister& lcr); - - void TryLoadBinaryArchive(); }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp new file mode 100644 index 000000000..9eb29cb6a --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -0,0 +1,596 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" + +#include "Cafe/HW/Latte/Core/FetchShader.h" +#include "Cafe/HW/Latte/ISA/RegDefines.h" +#include "Cafe/HW/Latte/Core/LatteConst.h" +#include "Cafe/HW/Latte/Core/LatteShader.h" + +static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx)); + gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx)); +} + +static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant)); + gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n")); +} + +static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) +{ + sint32 pList[4] = { p0, p1, p2, p3 }; + for (sint32 i = 0; i < 4; i++) + { + if (pList[i] == 3) + rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); + else + rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); + } + gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0])); + gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1])); + gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2])); + gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1])); + gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2])); + gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3])); +} + +static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) +{ + std::string gsSrc; + gsSrc.append("#include \r\n"); + gsSrc.append("using namespace metal;\r\n"); + + LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); + + // inputs & outputs + std::string vertexOutDefinition = "struct VertexOut {\r\n"; + vertexOutDefinition += "float4 position;\r\n"; + std::string geometryOutDefinition = "struct GeometryOut {\r\n"; + geometryOutDefinition += "float4 position [[position]];\r\n"; + auto parameterMask = vertexShader->outputParameterMask; + for (sint32 f = 0; f < 2; f++) + { + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); + if (psImport == nullptr) + continue; + + if (f == 0) + { + vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); + } + else + { + geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); + + geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId)); + if (psImport->isFlat) + geometryOutDefinition += " [[flat]]"; + if (psImport->isNoPerspective) + geometryOutDefinition += " [[center_no_perspective]]"; + geometryOutDefinition += ";\r\n"; + } + } + } + vertexOutDefinition += "};\r\n"; + geometryOutDefinition += "};\r\n"; + + gsSrc.append(vertexOutDefinition); + gsSrc.append(geometryOutDefinition); + + gsSrc.append("struct ObjectPayload {\r\n"); + gsSrc.append("VertexOut vertexOut[3];\r\n"); + gsSrc.append("};\r\n"); + + // gen function + gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return b - (c - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c - (b - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c + (b - a);\r\n"); + gsSrc.append("}\r\n"); + + // main + gsSrc.append("using MeshType = mesh;\r\n"); + gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n"); + gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("GeometryOut out;\r\n"); + + // there are two possible winding orders that need different triangle generation: + // 0 1 + // 2 3 + // and + // 0 1 + // 3 2 + // all others are just symmetries of these cases + + // we can determine the case by comparing the distance 0<->1 and 0<->2 + + gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n"); + + // emit vertices + gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n"); + gsSrc.append("{\r\n"); + // p0 to p1 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister); + gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n"); + // p0 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister); + gsSrc.append("} else {\r\n"); + // p1 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister); + gsSrc.append("}\r\n"); + + gsSrc.append("mesh.set_primitive_count(2);\r\n"); + + gsSrc.append("}\r\n"); + + auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); + mtlShader->PreponeCompilation(true); + + return mtlShader; +} + +#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF + +uint64 s_cacheTitleId = INVALID_TITLE_ID; + +extern std::atomic_int g_compiled_shaders_total; +extern std::atomic_int g_compiled_shaders_async; + +template +void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) +{ + // Rasterization + bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + + // HACK + // TODO: include this in the hash? + if (!lcr.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; + + // Culling both front and back faces effectively disables rasterization + const auto& polygonControlReg = lcr.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; + + auto pixelShaderMtl = static_cast(pixelShader->shader); + + if (!rasterizationEnabled || !pixelShaderMtl) + { + desc->setRasterizationEnabled(false); + return; + } + + desc->setFragmentFunction(pixelShaderMtl->GetFunction()); + + // Color attachments + const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; + uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); + uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + const auto& colorBuffer = lastUsedFBO->colorBuffer[i]; + auto texture = static_cast(colorBuffer.texture); + if (!texture) + { + continue; + } + auto colorAttachment = desc->colorAttachments()->object(i); + colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); + + // Disable writes if not in the active FBO + if (!activeFBO->colorBuffer[i].texture) + { + colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); + continue; + } + + colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF)); + + // Blending + bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; + // Only float data type is blendable + if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) + { + colorAttachment->setBlendingEnabled(true); + + const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i]; + + auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); + auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); + auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); + + colorAttachment->setRgbBlendOperation(rgbBlendOp); + colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); + if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) + { + colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); + colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); + colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); + } + else + { + colorAttachment->setAlphaBlendOperation(rgbBlendOp); + colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); + } + } + } + + // Depth stencil attachment + if (lastUsedFBO->depthBuffer.texture) + { + auto texture = static_cast(lastUsedFBO->depthBuffer.texture); + desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); + if (lastUsedFBO->depthBuffer.hasStencil) + { + desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); + } + } +} + +MetalPipelineCompiler::~MetalPipelineCompiler() +{ + /* + for (auto& pair : m_pipelineCache) + { + pair.second->release(); + } + m_pipelineCache.clear(); + + NS::Error* error = nullptr; + m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error); + if (error) + { + cemuLog_log(LogType::Force, "error serializing binary archive: {}", error->localizedDescription()->utf8String()); + error->release(); + } + m_binaryArchive->release(); + + m_binaryArchiveURL->release(); + */ + m_pipelineDescriptor->release(); +} + +void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); + bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); + + m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + + if (m_usesGeometryShader) + InitFromStateMesh(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + else + InitFromStateRender(fetchShader, vertexShader, pixelShader, lastUsedFBO, activeFBO, lcr); +} + +MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread) +{ + if (m_usesGeometryShader) + { + auto desc = static_cast(m_pipelineDescriptor); + + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Mesh render pipeline state", desc)); +#endif + MTL::RenderPipelineState* pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + desc->release(); + if (error) + { + cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + + return pipeline; + } + else + { + auto desc = static_cast(m_pipelineDescriptor); + + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Render pipeline state", desc)); +#endif + MTL::RenderPipelineState* pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + if (error) + { + cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + + return pipeline; + } +} + +void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + // Shaders + auto vertexShaderMtl = static_cast(vertexShader->shader); + + // Render pipeline state + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexShaderMtl->GetFunction()); + + // Vertex descriptor + if (!fetchShader->mtlFetchVertexManually) + { + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + uint32 minBufferStride = 0; + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + auto attribute = vertexDescriptor->attributes()->object(semanticId); + attribute->setOffset(attr.offset); + attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); + attribute->setFormat(GetMtlVertexFormat(attr.format)); + + minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format)); + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + if (bufferStride == 0) + { + // Buffer stride cannot be zero, let's use the minimum stride + bufferStride = minBufferStride; + + // Additionally, constant vertex function must be used + layout->setStepFunction(MTL::VertexStepFunctionConstant); + layout->setStepRate(0); + } + else + { + if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerVertex); + else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerInstance); + else + { + debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); + cemu_assert(false); + } + } + bufferStride = Align(bufferStride, 4); + layout->setStride(bufferStride); + } + + // TODO: don't always set the vertex descriptor? + desc->setVertexDescriptor(vertexDescriptor); + vertexDescriptor->release(); + } + + SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); + + m_pipelineDescriptor = desc; + + //TryLoadBinaryArchive(); + + // Load binary + /* + if (m_binaryArchive) + { + NS::Object* binArchives[] = {m_binaryArchive}; + auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); + desc->setBinaryArchives(binaryArchives); + binaryArchives->release(); + } + */ + + /* + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Cached render pipeline state", desc)); +#endif + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); + + // Pipeline wasn't found in the binary archive, we need to compile it + if (error) + { + desc->setBinaryArchives(nullptr); + + error->release(); + error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("New render pipeline state", desc)); +#endif + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); + if (error) + { + cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + else + { + // Save binary + if (m_binaryArchive) + { + NS::Error* error = nullptr; + m_binaryArchive->addRenderPipelineFunctions(desc, &error); + if (error) + { + cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String()); + error->release(); + } + } + } + } + desc->release(); + + return pipeline; + */ +} + +void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + auto objectShaderMtl = static_cast(vertexShader->shader); + RendererShaderMtl* meshShaderMtl; + if (geometryShader) + { + meshShaderMtl = static_cast(geometryShader->shader); + } + else + { + // If there is no geometry shader, it means that we are emulating rects + meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); + } + + // Render pipeline state + MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); + desc->setObjectFunction(objectShaderMtl->GetFunction()); + desc->setMeshFunction(meshShaderMtl->GetFunction()); + + SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); + + m_pipelineDescriptor = desc; + + //TryLoadBinaryArchive(); + + // Load binary + // TODO: no binary archives? :( + + /* + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Mesh pipeline state", desc)); +#endif + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + desc->release(); + if (error) + { + cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + + return pipeline; + */ +} + +/* +void MetalPipelineCache::TryLoadBinaryArchive() +{ + if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID) + return; + + // GPU name + const char* deviceName1 = m_mtlr->GetDevice()->name()->utf8String(); + std::string deviceName; + deviceName.assign(deviceName1); + + // Replace spaces with underscores + for (auto& c : deviceName) + { + if (c == ' ') + c = '_'; + } + + // OS version + auto osVersion = NS::ProcessInfo::processInfo()->operatingSystemVersion(); + + // Precompiled binaries cannot be shared between different devices or OS versions + const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId); + const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}/{}-{}-{}/{}", deviceName, osVersion.majorVersion, osVersion.minorVersion, osVersion.patchVersion, cacheFilename); + + // Create the directory if it doesn't exist + std::filesystem::create_directories(cachePath.parent_path()); + + m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str())); + + MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init(); + desc->setUrl(m_binaryArchiveURL); + + NS::Error* error = nullptr; + m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); + if (error) + { + desc->setUrl(nullptr); + + error->release(); + error = nullptr; + m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); + if (error) + { + cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String()); + error->release(); + } + } + desc->release(); +} +*/ diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h new file mode 100644 index 000000000..282c174d0 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include "Foundation/NSObject.hpp" +#include "HW/Latte/ISA/LatteReg.h" +#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" +#include "Cafe/HW/Latte/Renderer/Renderer.h" + +class MetalPipelineCompiler +{ +public: + MetalPipelineCompiler(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + ~MetalPipelineCompiler(); + + void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread); + +private: + class MetalRenderer* m_mtlr; + + bool m_usesGeometryShader; + + /* + std::map m_pipelineCache; + + NS::URL* m_binaryArchiveURL; + MTL::BinaryArchive* m_binaryArchive; + */ + NS::Object* m_pipelineDescriptor; + + void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + void InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + //void TryLoadBinaryArchive(); +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 7cd858576..76ed4c551 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1222,12 +1222,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 //} // Render pipeline state - MTL::RenderPipelineState* renderPipelineState; - if (usesGeometryShader) - renderPipelineState = m_pipelineCache->GetMeshPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew, hostIndexType); - else - renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew); - + MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew); if (!renderPipelineState) return; From e9e510d2cd72083f29c432767816a9ed112ced38 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Mon, 14 Oct 2024 20:00:37 +0200 Subject: [PATCH 02/20] add: base for pipeline caching --- .../Renderer/Metal/MetalPipelineCache.cpp | 378 ++++++++++++++++++ .../Latte/Renderer/Metal/MetalPipelineCache.h | 53 ++- 2 files changed, 430 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index a70f75418..ea95c2662 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -6,6 +6,14 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/Core/LatteConst.h" +#include "Cafe/HW/Latte/Core/LatteCachedFBO.h" +#include "Cafe/HW/Latte/Common/RegisterSerializer.h" +#include "Cafe/HW/Latte/Core/LatteShaderCache.h" +#include "Cemu/FileCache/FileCache.h" +#include "HW/Latte/Core/LatteShader.h" +#include "util/helpers/helpers.h" +#include "config/ActiveSettings.h" +#include uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { @@ -129,3 +137,373 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte return pipeline; } + +struct +{ + uint32 pipelineLoadIndex; + uint32 pipelineMaxFileIndex; + + std::atomic_uint32_t pipelinesQueued; + std::atomic_uint32_t pipelinesLoaded; +} g_mtlCacheState; + +uint32 MetalPipelineCache::BeginLoading(uint64 cacheTitleId) +{ + std::error_code ec; + fs::create_directories(ActiveSettings::GetCachePath("shaderCache/transferable"), ec); + const auto pathCacheFile = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_mtlpipeline.bin", cacheTitleId); + + // init cache loader state + g_mtlCacheState.pipelineLoadIndex = 0; + g_mtlCacheState.pipelineMaxFileIndex = 0; + g_mtlCacheState.pipelinesLoaded = 0; + g_mtlCacheState.pipelinesQueued = 0; + + // start async compilation threads + m_compilationCount.store(0); + m_compilationQueue.clear(); + + // get core count + uint32 cpuCoreCount = GetPhysicalCoreCount(); + m_numCompilationThreads = std::clamp(cpuCoreCount, 1u, 8u); + // TODO: uncomment? + //if (VulkanRenderer::GetInstance()->GetDisableMultithreadedCompilation()) + // m_numCompilationThreads = 1; + + for (uint32 i = 0; i < m_numCompilationThreads; i++) + { + std::thread compileThread(&MetalPipelineCache::CompilerThread, this); + compileThread.detach(); + } + + // open cache file or create it + cemu_assert_debug(s_cache == nullptr); + s_cache = FileCache::Open(pathCacheFile, true, LatteShaderCache_getPipelineCacheExtraVersion(cacheTitleId)); + if (!s_cache) + { + cemuLog_log(LogType::Force, "Failed to open or create Vulkan pipeline cache file: {}", _pathToUtf8(pathCacheFile)); + return 0; + } + else + { + s_cache->UseCompression(false); + g_mtlCacheState.pipelineMaxFileIndex = s_cache->GetMaximumFileIndex(); + } + return s_cache->GetFileCount(); +} + +bool MetalPipelineCache::UpdateLoading(uint32& pipelinesLoadedTotal, uint32& pipelinesMissingShaders) +{ + pipelinesLoadedTotal = g_mtlCacheState.pipelinesLoaded; + pipelinesMissingShaders = 0; + while (g_mtlCacheState.pipelineLoadIndex <= g_mtlCacheState.pipelineMaxFileIndex) + { + if (m_compilationQueue.size() >= 50) + { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + return true; // queue up to 50 entries at a time + } + + uint64 fileNameA, fileNameB; + std::vector fileData; + if (s_cache->GetFileByIndex(g_mtlCacheState.pipelineLoadIndex, &fileNameA, &fileNameB, fileData)) + { + // queue for async compilation + g_mtlCacheState.pipelinesQueued++; + m_compilationQueue.push(std::move(fileData)); + g_mtlCacheState.pipelineLoadIndex++; + return true; + } + g_mtlCacheState.pipelineLoadIndex++; + } + if (g_mtlCacheState.pipelinesLoaded != g_mtlCacheState.pipelinesQueued) + { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + return true; // pipelines still compiling + } + return false; // done +} + +void MetalPipelineCache::EndLoading() +{ + // shut down compilation threads + uint32 threadCount = m_numCompilationThreads; + m_numCompilationThreads = 0; // signal thread shutdown + for (uint32 i = 0; i < threadCount; i++) + { + m_compilationQueue.push({}); // push empty workload for every thread. Threads then will shutdown after checking for m_numCompilationThreads == 0 + } + // keep cache file open for writing of new pipelines +} + +void MetalPipelineCache::Close() +{ + if(s_cache) + { + delete s_cache; + s_cache = nullptr; + } +} + +struct CachedPipeline +{ + struct ShaderHash + { + uint64 baseHash; + uint64 auxHash; + bool isPresent{}; + + void set(uint64 baseHash, uint64 auxHash) + { + this->baseHash = baseHash; + this->auxHash = auxHash; + this->isPresent = true; + } + }; + + ShaderHash vsHash; // includes fetch shader + ShaderHash gsHash; + ShaderHash psHash; + + Latte::GPUCompactedRegisterState gpuState; +}; + +void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) +{ + static FSpinlock s_spinlockSharedInternal; + + // deserialize file + LatteContextRegister* lcr = new LatteContextRegister(); + s_spinlockSharedInternal.lock(); + CachedPipeline* cachedPipeline = new CachedPipeline(); + s_spinlockSharedInternal.unlock(); + + MemStreamReader streamReader(fileData.data(), fileData.size()); + if (!DeserializePipeline(streamReader, *cachedPipeline)) + { + // failed to deserialize + s_spinlockSharedInternal.lock(); + delete lcr; + delete cachedPipeline; + s_spinlockSharedInternal.unlock(); + return; + } + // restored register view from compacted state + Latte::LoadGPURegisterState(*lcr, cachedPipeline->gpuState); + + LatteDecompilerShader* vertexShader = nullptr; + LatteDecompilerShader* geometryShader = nullptr; + LatteDecompilerShader* pixelShader = nullptr; + // find vertex shader + if (cachedPipeline->vsHash.isPresent) + { + vertexShader = LatteSHRC_FindVertexShader(cachedPipeline->vsHash.baseHash, cachedPipeline->vsHash.auxHash); + if (!vertexShader) + { + cemuLog_logDebug(LogType::Force, "Vertex shader not found in cache"); + return; + } + } + // find geometry shader + if (cachedPipeline->gsHash.isPresent) + { + geometryShader = LatteSHRC_FindGeometryShader(cachedPipeline->gsHash.baseHash, cachedPipeline->gsHash.auxHash); + if (!geometryShader) + { + cemuLog_logDebug(LogType::Force, "Geometry shader not found in cache"); + return; + } + } + // find pixel shader + if (cachedPipeline->psHash.isPresent) + { + pixelShader = LatteSHRC_FindPixelShader(cachedPipeline->psHash.baseHash, cachedPipeline->psHash.auxHash); + if (!pixelShader) + { + cemuLog_logDebug(LogType::Force, "Pixel shader not found in cache"); + return; + } + } + + if (!pixelShader) + { + cemu_assert_debug(false); + return; + } + + // create pipeline info + m_pipelineIsCachedLock.lock(); + m_pipelineIsCachedLock.unlock(); + throw; + // TODO: uncomment + /* + // compile + { + MetalPipelineCompiler pp(m_mtlr); + if (!pp.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, activeFBO, activeFBO, *lcr)) + { + s_spinlockSharedInternal.lock(); + delete lcr; + delete cachedPipeline; + s_spinlockSharedInternal.unlock(); + return; + } + pp.Compile(true, true); + // destroy pp early + } + // on success, calculate pipeline hash and flag as present in cache + uint64 pipelineBaseHash = vertexShader->baseHash; + uint64 pipelineStateHash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, activeFBO, activeFBO, *lcr); + m_pipelineIsCachedLock.lock(); + m_pipelineIsCached.emplace(pipelineBaseHash, pipelineStateHash); + m_pipelineIsCachedLock.unlock(); + */ + + // clean up + s_spinlockSharedInternal.lock(); + delete lcr; + delete cachedPipeline; + s_spinlockSharedInternal.unlock(); +} + +bool MetalPipelineCache::HasPipelineCached(uint64 baseHash, uint64 pipelineStateHash) +{ + PipelineHash ph(baseHash, pipelineStateHash); + return m_pipelineIsCached.find(ph) != m_pipelineIsCached.end(); +} + +ConcurrentQueue g_mtlPipelineCachingQueue; + +void MetalPipelineCache::AddCurrentStateToCache(uint64 baseHash, uint64 pipelineStateHash) +{ + m_pipelineIsCached.emplace(baseHash, pipelineStateHash); + if (!m_pipelineCacheStoreThread) + { + m_pipelineCacheStoreThread = new std::thread(&MetalPipelineCache::WorkerThread, this); + m_pipelineCacheStoreThread->detach(); + } + // fill job structure with cached GPU state + // for each cached pipeline we store: + // - Active shaders (referenced by hash) + // - An almost-complete register state of the GPU (minus some ALU uniform constants which aren't relevant) + CachedPipeline* job = new CachedPipeline(); + auto vs = LatteSHRC_GetActiveVertexShader(); + auto gs = LatteSHRC_GetActiveGeometryShader(); + auto ps = LatteSHRC_GetActivePixelShader(); + if (vs) + job->vsHash.set(vs->baseHash, vs->auxHash); + if (gs) + job->gsHash.set(gs->baseHash, gs->auxHash); + if (ps) + job->psHash.set(ps->baseHash, ps->auxHash); + Latte::StoreGPURegisterState(LatteGPUState.contextNew, job->gpuState); + // queue job + g_mtlPipelineCachingQueue.push(job); +} + +bool MetalPipelineCache::SerializePipeline(MemStreamWriter& memWriter, CachedPipeline& cachedPipeline) +{ + memWriter.writeBE(0x01); // version + uint8 presentMask = 0; + if (cachedPipeline.vsHash.isPresent) + presentMask |= 1; + if (cachedPipeline.gsHash.isPresent) + presentMask |= 2; + if (cachedPipeline.psHash.isPresent) + presentMask |= 4; + memWriter.writeBE(presentMask); + if (cachedPipeline.vsHash.isPresent) + { + memWriter.writeBE(cachedPipeline.vsHash.baseHash); + memWriter.writeBE(cachedPipeline.vsHash.auxHash); + } + if (cachedPipeline.gsHash.isPresent) + { + memWriter.writeBE(cachedPipeline.gsHash.baseHash); + memWriter.writeBE(cachedPipeline.gsHash.auxHash); + } + if (cachedPipeline.psHash.isPresent) + { + memWriter.writeBE(cachedPipeline.psHash.baseHash); + memWriter.writeBE(cachedPipeline.psHash.auxHash); + } + Latte::SerializeRegisterState(cachedPipeline.gpuState, memWriter); + return true; +} + +bool MetalPipelineCache::DeserializePipeline(MemStreamReader& memReader, CachedPipeline& cachedPipeline) +{ + // version + if (memReader.readBE() != 1) + { + cemuLog_log(LogType::Force, "Cached Vulkan pipeline corrupted or has unknown version"); + return false; + } + // shader hashes + uint8 presentMask = memReader.readBE(); + if (presentMask & 1) + { + uint64 baseHash = memReader.readBE(); + uint64 auxHash = memReader.readBE(); + cachedPipeline.vsHash.set(baseHash, auxHash); + } + if (presentMask & 2) + { + uint64 baseHash = memReader.readBE(); + uint64 auxHash = memReader.readBE(); + cachedPipeline.gsHash.set(baseHash, auxHash); + } + if (presentMask & 4) + { + uint64 baseHash = memReader.readBE(); + uint64 auxHash = memReader.readBE(); + cachedPipeline.psHash.set(baseHash, auxHash); + } + // deserialize GPU state + if (!Latte::DeserializeRegisterState(cachedPipeline.gpuState, memReader)) + { + return false; + } + cemu_assert_debug(!memReader.hasError()); + return true; +} + +int MetalPipelineCache::CompilerThread() +{ + SetThreadName("plCacheCompiler"); + while (m_numCompilationThreads != 0) + { + std::vector pipelineData = m_compilationQueue.pop(); + if(pipelineData.empty()) + continue; + LoadPipelineFromCache(pipelineData); + ++g_mtlCacheState.pipelinesLoaded; + } + return 0; +} + +void MetalPipelineCache::WorkerThread() +{ + SetThreadName("plCacheWriter"); + while (true) + { + CachedPipeline* job; + g_mtlPipelineCachingQueue.pop(job); + if (!s_cache) + { + delete job; + continue; + } + // serialize + MemStreamWriter memWriter(1024 * 4); + SerializePipeline(memWriter, *job); + auto blob = memWriter.getResult(); + // file name is derived from data hash + uint8 hash[SHA256_DIGEST_LENGTH]; + SHA256(blob.data(), blob.size(), hash); + uint64 nameA = *(uint64be*)(hash + 0); + uint64 nameB = *(uint64be*)(hash + 8); + s_cache->AddFileAsync({ nameA, nameB }, blob.data(), blob.size()); + delete job; + } +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index 18b163f6d..5e6d476fe 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -1,18 +1,54 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" +#include "util/helpers/ConcurrentQueue.h" +#include "util/helpers/fspinlock.h" // TODO: binary archives class MetalPipelineCache { public: - static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + struct PipelineHash + { + PipelineHash(uint64 h0, uint64 h1) : h0(h0), h1(h1) {}; + + uint64 h0; + uint64 h1; + + bool operator==(const PipelineHash& r) const + { + return h0 == r.h0 && h1 == r.h1; + } + + struct HashFunc + { + size_t operator()(const PipelineHash& v) const + { + static_assert(sizeof(uint64) == sizeof(size_t)); + return v.h0 ^ v.h1; + } + }; + }; MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} ~MetalPipelineCache(); MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + // Cache loading + uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache + bool UpdateLoading(uint32& pipelinesLoadedTotal, uint32& pipelinesMissingShaders); + void EndLoading(); + void LoadPipelineFromCache(std::span fileData); + void Close(); // called on title exit + + bool HasPipelineCached(uint64 baseHash, uint64 pipelineStateHash); + void AddCurrentStateToCache(uint64 baseHash, uint64 pipelineStateHash); + + // pipeline serialization for file + bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline); + bool DeserializePipeline(class MemStreamReader& memReader, struct CachedPipeline& cachedPipeline); + // Debug size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); } @@ -20,4 +56,19 @@ class MetalPipelineCache class MetalRenderer* m_mtlr; std::map m_pipelineCache; + + std::thread* m_pipelineCacheStoreThread; + + std::unordered_set m_pipelineIsCached; + FSpinlock m_pipelineIsCachedLock; + class FileCache* s_cache; + + std::atomic_uint32_t m_numCompilationThreads{ 0 }; + ConcurrentQueue> m_compilationQueue; + std::atomic_uint32_t m_compilationCount; + + static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + int CompilerThread(); + void WorkerThread(); }; From 6b47d4f61e503a7f3e43c5894e593003882917f9 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 07:48:59 +0200 Subject: [PATCH 03/20] implement pipeline cache serializing --- src/Cafe/HW/Latte/Core/LatteShaderCache.cpp | 32 +++++++--- .../Renderer/Metal/MetalPipelineCache.cpp | 62 ++++++++++++------- .../Latte/Renderer/Metal/MetalPipelineCache.h | 7 ++- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index cdb41184e..126dcc500 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -64,7 +64,7 @@ FileCache* s_shaderCacheGeneric = nullptr; // contains hardware and version inde #define SHADER_CACHE_TYPE_PIXEL (2) bool LatteShaderCache_readSeparableShader(uint8* shaderInfoData, sint32 shaderInfoSize); -void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId); +void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId); bool LatteShaderCache_updatePipelineLoadingProgress(); void LatteShaderCache_ShowProgress(const std::function & loadUpdateFunc, bool isPipelines); @@ -347,9 +347,9 @@ void LatteShaderCache_Load() cemuLog_log(LogType::Force, "Shader cache loaded with {} shaders. Commited mem {}MB. Took {}ms", numLoadedShaders, (sint32)(memCommited/1024/1024), timeLoad); #endif LatteShaderCache_finish(); - // if Vulkan then also load pipeline cache - if (g_renderer->GetType() == RendererAPI::Vulkan) - LatteShaderCache_LoadVulkanPipelineCache(cacheTitleId); + // if Vulkan or Metal then also load pipeline cache + if (g_renderer->GetType() == RendererAPI::Vulkan || g_renderer->GetType() == RendererAPI::Metal) + LatteShaderCache_LoadPipelineCache(cacheTitleId); g_renderer->BeginFrame(true); @@ -504,13 +504,18 @@ void LatteShaderCache_ShowProgress(const std::function & loadUpdateF } } -void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId) +void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId) { - auto& pipelineCache = VulkanPipelineStableCache::GetInstance(); - g_shaderCacheLoaderState.pipelineFileCount = pipelineCache.BeginLoading(cacheTitleId); + if (g_renderer->GetType() == RendererAPI::Vulkan) + g_shaderCacheLoaderState.pipelineFileCount = VulkanPipelineStableCache::GetInstance().BeginLoading(cacheTitleId); + else if (g_renderer->GetType() == RendererAPI::Metal) + g_shaderCacheLoaderState.pipelineFileCount = MetalPipelineCache::GetInstance().BeginLoading(cacheTitleId); g_shaderCacheLoaderState.loadedPipelines = 0; LatteShaderCache_ShowProgress(LatteShaderCache_updatePipelineLoadingProgress, true); - pipelineCache.EndLoading(); + if (g_renderer->GetType() == RendererAPI::Vulkan) + VulkanPipelineStableCache::GetInstance().EndLoading(); + else if (g_renderer->GetType() == RendererAPI::Metal) + MetalPipelineCache::GetInstance().EndLoading(); if(Latte_GetStopSignal()) LatteThread_Exit(); } @@ -518,7 +523,12 @@ void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId) bool LatteShaderCache_updatePipelineLoadingProgress() { uint32 pipelinesMissingShaders = 0; - return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders); + if (g_renderer->GetType() == RendererAPI::Vulkan) + return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders); + else if (g_renderer->GetType() == RendererAPI::Metal) + return MetalPipelineCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders); + + return false; } uint64 LatteShaderCache_getShaderNameInTransferableCache(uint64 baseHash, uint32 shaderType) @@ -783,9 +793,11 @@ void LatteShaderCache_Close() else if (g_renderer->GetType() == RendererAPI::Metal) RendererShaderMtl::ShaderCacheLoading_Close(); - // if Vulkan then also close pipeline cache + // if Vulkan or Metal then also close pipeline cache if (g_renderer->GetType() == RendererAPI::Vulkan) VulkanPipelineStableCache::GetInstance().Close(); + else if (g_renderer->GetType() == RendererAPI::Metal) + MetalPipelineCache::GetInstance().Close(); } #include diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index ea95c2662..2922d70ce 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -15,6 +15,43 @@ #include "config/ActiveSettings.h" #include +MetalPipelineCache* g_mtlPipelineCache = nullptr; + +MetalPipelineCache& MetalPipelineCache::GetInstance() +{ + return *g_mtlPipelineCache; +} + +MetalPipelineCache::MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} +{ + g_mtlPipelineCache = this; +} + +MetalPipelineCache::~MetalPipelineCache() +{ + for (auto& [key, value] : m_pipelineCache) + { + value->release(); + } +} + +MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + auto& pipeline = m_pipelineCache[hash]; + if (pipeline) + return pipeline; + + MetalPipelineCompiler compiler(m_mtlr); + compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + pipeline = compiler.Compile(false, true); + + if (!HasPipelineCached(vertexShader->baseHash, hash)) + AddCurrentStateToCache(vertexShader->baseHash, hash); + + return pipeline; +} + uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { // Hash @@ -117,27 +154,6 @@ uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchSh return stateHash; } -MetalPipelineCache::~MetalPipelineCache() -{ - for (auto& [key, value] : m_pipelineCache) - { - value->release(); - } -} - -MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) -{ - auto& pipeline = m_pipelineCache[CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr)]; - if (pipeline) - return pipeline; - - MetalPipelineCompiler compiler(m_mtlr); - compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); - pipeline = compiler.Compile(false, true); - - return pipeline; -} - struct { uint32 pipelineLoadIndex; @@ -181,7 +197,7 @@ uint32 MetalPipelineCache::BeginLoading(uint64 cacheTitleId) s_cache = FileCache::Open(pathCacheFile, true, LatteShaderCache_getPipelineCacheExtraVersion(cacheTitleId)); if (!s_cache) { - cemuLog_log(LogType::Force, "Failed to open or create Vulkan pipeline cache file: {}", _pathToUtf8(pathCacheFile)); + cemuLog_log(LogType::Force, "Failed to open or create Metal pipeline cache file: {}", _pathToUtf8(pathCacheFile)); return 0; } else @@ -436,7 +452,7 @@ bool MetalPipelineCache::DeserializePipeline(MemStreamReader& memReader, CachedP // version if (memReader.readBE() != 1) { - cemuLog_log(LogType::Force, "Cached Vulkan pipeline corrupted or has unknown version"); + cemuLog_log(LogType::Force, "Cached Metal pipeline corrupted or has unknown version"); return false; } // shader hashes diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index 5e6d476fe..59f61a15e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -7,7 +7,7 @@ // TODO: binary archives class MetalPipelineCache { -public: +private: struct PipelineHash { PipelineHash(uint64 h0, uint64 h1) : h0(h0), h1(h1) {}; @@ -30,7 +30,10 @@ class MetalPipelineCache }; }; - MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} +public: + static MetalPipelineCache& GetInstance(); + + MetalPipelineCache(class MetalRenderer* metalRenderer); ~MetalPipelineCache(); MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); From cd21d957b3832c78ac5364cb5f8406efc91c204a Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 17:15:46 +0200 Subject: [PATCH 04/20] refactor fbos --- src/Cafe/CMakeLists.txt | 3 + .../Renderer/Metal/MetalAttachmentsInfo.cpp | 48 ++++++++++++++++ .../Renderer/Metal/MetalAttachmentsInfo.h | 15 +++++ .../Renderer/Metal/MetalPipelineCache.cpp | 55 +++++++++---------- .../Latte/Renderer/Metal/MetalPipelineCache.h | 4 +- .../Renderer/Metal/MetalPipelineCompiler.cpp | 45 ++++++++------- .../Renderer/Metal/MetalPipelineCompiler.h | 14 ++--- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 21 +++---- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 13 ++++- 9 files changed, 143 insertions(+), 75 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 3d1a02305..b30f8efef 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -534,6 +534,7 @@ if(APPLE) endif() if(ENABLE_METAL) + # TODO: sort alphabetically target_sources(CemuCafe PRIVATE HW/Latte/Renderer/Metal/MetalRenderer.cpp HW/Latte/Renderer/Metal/MetalRenderer.h @@ -555,6 +556,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/RendererShaderMtl.h HW/Latte/Renderer/Metal/CachedFBOMtl.cpp HW/Latte/Renderer/Metal/CachedFBOMtl.h + HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp + HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h HW/Latte/Renderer/Metal/MetalBufferAllocator.h HW/Latte/Renderer/Metal/MetalMemoryManager.cpp HW/Latte/Renderer/Metal/MetalMemoryManager.h diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp new file mode 100644 index 000000000..88a2dface --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp @@ -0,0 +1,48 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" +#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" + +MetalAttachmentsInfo::MetalAttachmentsInfo(class CachedFBOMtl* fbo) +{ + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + const auto& colorBuffer = fbo->colorBuffer[i]; + auto texture = static_cast(colorBuffer.texture); + if (!texture) + continue; + + colorFormats[i] = texture->format; + } + + // Depth stencil attachment + if (fbo->depthBuffer.texture) + { + auto texture = static_cast(fbo->depthBuffer.texture); + depthFormat = texture->format; + hasStencil = fbo->depthBuffer.hasStencil; + } +} + +MetalAttachmentsInfo::MetalAttachmentsInfo(const LatteContextRegister& lcr, const LatteDecompilerShader* pixelShader) +{ + uint8 cbMask = LatteMRT::GetActiveColorBufferMask(pixelShader, lcr); + bool dbMask = LatteMRT::GetActiveDepthBufferMask(lcr); + + // Color attachments + for (int i = 0; i < 8; ++i) + { + if ((cbMask & (1 << i)) == 0) + continue; + + colorFormats[i] = LatteMRT::GetColorBufferFormat(i, lcr); + } + + // Depth stencil attachment + if (dbMask) + { + Latte::E_GX2SURFFMT format = LatteMRT::GetDepthBufferFormat(lcr); + depthFormat = format; + hasStencil = GetMtlPixelFormatInfo(format, true).hasStencil; + } +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h new file mode 100644 index 000000000..c8ebe7c11 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h @@ -0,0 +1,15 @@ +#pragma once + +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" + +class MetalAttachmentsInfo +{ +public: + MetalAttachmentsInfo() = default; + MetalAttachmentsInfo(class CachedFBOMtl* fbo); + MetalAttachmentsInfo(const LatteContextRegister& lcr, const class LatteDecompilerShader* pixelShader); + + Latte::E_GX2SURFFMT colorFormats[LATTE_NUM_COLOR_TARGET] = {Latte::E_GX2SURFFMT::INVALID_FORMAT}; + Latte::E_GX2SURFFMT depthFormat = Latte::E_GX2SURFFMT::INVALID_FORMAT; + bool hasStencil = false; +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 2922d70ce..bb533b7f2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -11,6 +11,9 @@ #include "Cafe/HW/Latte/Core/LatteShaderCache.h" #include "Cemu/FileCache/FileCache.h" #include "HW/Latte/Core/LatteShader.h" +#include "HW/Latte/ISA/LatteReg.h" +#include "HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" #include "util/helpers/helpers.h" #include "config/ActiveSettings.h" #include @@ -35,15 +38,15 @@ MetalPipelineCache::~MetalPipelineCache() } } -MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { - uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); auto& pipeline = m_pipelineCache[hash]; if (pipeline) return pipeline; MetalPipelineCompiler compiler(m_mtlr); - compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); pipeline = compiler.Compile(false, true); if (!HasPipelineCached(vertexShader->baseHash, hash)) @@ -52,33 +55,32 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte return pipeline; } -uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { // Hash uint64 stateHash = 0; for (int i = 0; i < Latte::GPU_LIMITS::NUM_COLOR_ATTACHMENTS; ++i) { - auto textureView = static_cast(lastUsedFBO->colorBuffer[i].texture); - if (!textureView) - continue; + Latte::E_GX2SURFFMT format = lastUsedAttachmentsInfo.colorFormats[i]; + if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT) + continue; - stateHash += textureView->GetRGBAView()->pixelFormat() + i * 31; + stateHash += GetMtlPixelFormat(format, false) + i * 31; stateHash = std::rotl(stateHash, 7); - if (activeFBO->colorBuffer[i].texture) + if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT) { stateHash += 1; stateHash = std::rotl(stateHash, 1); } } - if (lastUsedFBO->depthBuffer.texture) + if (lastUsedAttachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT) { - auto textureView = static_cast(lastUsedFBO->depthBuffer.texture); - stateHash += textureView->GetRGBAView()->pixelFormat(); + stateHash += GetMtlPixelFormat(lastUsedAttachmentsInfo.depthFormat, true); stateHash = std::rotl(stateHash, 7); - if (activeFBO->depthBuffer.texture) + if (activeAttachmentsInfo.depthFormat == Latte::E_GX2SURFFMT::INVALID_FORMAT) { stateHash += 1; stateHash = std::rotl(stateHash, 1); @@ -347,33 +349,28 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) return; } - // create pipeline info - m_pipelineIsCachedLock.lock(); - m_pipelineIsCachedLock.unlock(); - throw; - // TODO: uncomment - /* + MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader); + // compile { MetalPipelineCompiler pp(m_mtlr); - if (!pp.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, activeFBO, activeFBO, *lcr)) - { - s_spinlockSharedInternal.lock(); - delete lcr; - delete cachedPipeline; - s_spinlockSharedInternal.unlock(); - return; - } + pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); + //{ + // s_spinlockSharedInternal.lock(); + // delete lcr; + // delete cachedPipeline; + // s_spinlockSharedInternal.unlock(); + // return; + //} pp.Compile(true, true); // destroy pp early } // on success, calculate pipeline hash and flag as present in cache uint64 pipelineBaseHash = vertexShader->baseHash; - uint64 pipelineStateHash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, activeFBO, activeFBO, *lcr); + uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); m_pipelineIsCachedLock.lock(); m_pipelineIsCached.emplace(pipelineBaseHash, pipelineStateHash); m_pipelineIsCachedLock.unlock(); - */ // clean up s_spinlockSharedInternal.lock(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index 59f61a15e..d74b50904 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -36,7 +36,7 @@ class MetalPipelineCache MetalPipelineCache(class MetalRenderer* metalRenderer); ~MetalPipelineCache(); - MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); // Cache loading uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache @@ -70,7 +70,7 @@ class MetalPipelineCache ConcurrentQueue> m_compilationQueue; std::atomic_uint32_t m_compilationCount; - static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); int CompilerThread(); void WorkerThread(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 9eb29cb6a..e715ae26d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -10,6 +10,8 @@ #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShader.h" +#include "HW/Latte/ISA/LatteReg.h" +#include "Metal/MTLPixelFormat.hpp" static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) { @@ -189,7 +191,7 @@ extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; template -void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) +void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) { // Rasterization bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); @@ -222,17 +224,16 @@ void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFB uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) { - const auto& colorBuffer = lastUsedFBO->colorBuffer[i]; - auto texture = static_cast(colorBuffer.texture); - if (!texture) - { + Latte::E_GX2SURFFMT format = lastUsedAttachmentsInfo.colorFormats[i]; + if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT) continue; - } + + MTL::PixelFormat pixelFormat = GetMtlPixelFormat(format, false); auto colorAttachment = desc->colorAttachments()->object(i); - colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); + colorAttachment->setPixelFormat(pixelFormat); // Disable writes if not in the active FBO - if (!activeFBO->colorBuffer[i].texture) + if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT) { colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); continue; @@ -243,7 +244,7 @@ void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFB // Blending bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; // Only float data type is blendable - if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) + if (blendEnabled && GetMtlPixelFormatInfo(format, false).dataType == MetalDataType::FLOAT) { colorAttachment->setBlendingEnabled(true); @@ -272,14 +273,12 @@ void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFB } // Depth stencil attachment - if (lastUsedFBO->depthBuffer.texture) + if (lastUsedAttachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT) { - auto texture = static_cast(lastUsedFBO->depthBuffer.texture); - desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - if (lastUsedFBO->depthBuffer.hasStencil) - { - desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - } + MTL::PixelFormat pixelFormat = GetMtlPixelFormat(lastUsedAttachmentsInfo.depthFormat, true); + desc->setDepthAttachmentPixelFormat(pixelFormat); + if (lastUsedAttachmentsInfo.hasStencil) + desc->setStencilAttachmentPixelFormat(pixelFormat); } } @@ -306,7 +305,7 @@ MetalPipelineCompiler::~MetalPipelineCompiler() m_pipelineDescriptor->release(); } -void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); @@ -314,9 +313,9 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); if (m_usesGeometryShader) - InitFromStateMesh(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + InitFromStateMesh(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); else - InitFromStateRender(fetchShader, vertexShader, pixelShader, lastUsedFBO, activeFBO, lcr); + InitFromStateRender(fetchShader, vertexShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); } MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread) @@ -358,7 +357,7 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool } } -void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { // Shaders auto vertexShaderMtl = static_cast(vertexShader->shader); @@ -437,7 +436,7 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha vertexDescriptor->release(); } - SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, pixelShader, lcr); m_pipelineDescriptor = desc; @@ -498,7 +497,7 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha */ } -void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { auto objectShaderMtl = static_cast(vertexShader->shader); RendererShaderMtl* meshShaderMtl; @@ -517,7 +516,7 @@ void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShade desc->setObjectFunction(objectShaderMtl->GetFunction()); desc->setMeshFunction(meshShaderMtl->GetFunction()); - SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, pixelShader, lcr); m_pipelineDescriptor = desc; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index 282c174d0..e1e3e7543 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -1,11 +1,9 @@ #pragma once -#include +#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" -#include "Foundation/NSObject.hpp" -#include "HW/Latte/ISA/LatteReg.h" -#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" -#include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "Cafe/HW/Latte/ISA/LatteReg.h" +#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" class MetalPipelineCompiler { @@ -13,7 +11,7 @@ class MetalPipelineCompiler MetalPipelineCompiler(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} ~MetalPipelineCompiler(); - void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread); @@ -30,9 +28,9 @@ class MetalPipelineCompiler */ NS::Object* m_pipelineDescriptor; - void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); - void InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + void InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); //void TryLoadBinaryArchive(); }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 76ed4c551..17050326f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -23,6 +23,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" #include "config/CemuConfig.h" #define IMGUI_IMPL_METAL_CPP @@ -511,13 +512,13 @@ LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo) { - if (cfbo == (LatteCachedFBO*)m_state.m_activeFBO) - m_state.m_activeFBO = nullptr; + if (cfbo == (LatteCachedFBO*)m_state.m_activeFBO.m_fbo) + m_state.m_activeFBO = {nullptr}; } void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo) { - m_state.m_activeFBO = (CachedFBOMtl*)cfbo; + m_state.m_activeFBO = {(CachedFBOMtl*)cfbo, MetalAttachmentsInfo((CachedFBOMtl*)cfbo)}; } void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size) @@ -1008,7 +1009,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Disable depth write when there is no depth attachment auto& depthControl = LatteGPUState.contextNew.DB_DEPTH_CONTROL; bool depthWriteEnable = depthControl.get_Z_WRITE_ENABLE(); - if (!m_state.m_activeFBO->depthBuffer.texture) + if (!m_state.m_activeFBO.m_fbo->depthBuffer.texture) depthControl.set_Z_WRITE_ENABLE(false); MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew); @@ -1222,7 +1223,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 //} // Render pipeline state - MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew); + MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew); if (!renderPipelineState) return; @@ -1524,12 +1525,12 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr { if (m_encoderType == MetalEncoderType::Render) { - bool needsNewRenderPass = (m_state.m_lastUsedFBO == nullptr); + bool needsNewRenderPass = (m_state.m_lastUsedFBO.m_fbo == nullptr); if (!needsNewRenderPass) { for (uint8 i = 0; i < 8; i++) { - if (m_state.m_activeFBO->colorBuffer[i].texture && m_state.m_activeFBO->colorBuffer[i].texture != m_state.m_lastUsedFBO->colorBuffer[i].texture) + if (m_state.m_activeFBO.m_fbo->colorBuffer[i].texture && m_state.m_activeFBO.m_fbo->colorBuffer[i].texture != m_state.m_lastUsedFBO.m_fbo->colorBuffer[i].texture) { needsNewRenderPass = true; break; @@ -1539,7 +1540,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr if (!needsNewRenderPass) { - if (m_state.m_activeFBO->depthBuffer.texture && (m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture || ( m_state.m_activeFBO->depthBuffer.hasStencil && !m_state.m_lastUsedFBO->depthBuffer.hasStencil))) + if (m_state.m_activeFBO.m_fbo->depthBuffer.texture && (m_state.m_activeFBO.m_fbo->depthBuffer.texture != m_state.m_lastUsedFBO.m_fbo->depthBuffer.texture || ( m_state.m_activeFBO.m_fbo->depthBuffer.hasStencil && !m_state.m_lastUsedFBO.m_fbo->depthBuffer.hasStencil))) { needsNewRenderPass = true; } @@ -1557,7 +1558,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr auto commandBuffer = GetCommandBuffer(); - auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO->GetRenderPassDescriptor()); + auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO.m_fbo->GetRenderPassDescriptor()); #ifdef CEMU_DEBUG_ASSERT renderCommandEncoder->setLabel(GetLabel("Render command encoder", renderCommandEncoder)); #endif @@ -1716,7 +1717,7 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader) // If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) { - auto colorTarget = m_state.m_activeFBO->colorBuffer[i].texture; + auto colorTarget = m_state.m_activeFBO.m_fbo->colorBuffer[i].texture; if (colorTarget && colorTarget->baseTexture == baseTexture) return true; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 526f33a5c..93c9a56d8 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -5,6 +5,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" struct MetalBufferAllocation { @@ -121,6 +122,12 @@ struct MetalStreamoutState sint32 verticesPerInstance; }; +struct MetalActiveFBOState +{ + class CachedFBOMtl* m_fbo = nullptr; + MetalAttachmentsInfo m_attachmentsInfo; +}; + struct MetalState { MetalEncoderState m_encoderState{}; @@ -130,9 +137,9 @@ struct MetalState bool m_skipDrawSequence = false; bool m_isFirstDrawInRenderPass = true; - class CachedFBOMtl* m_activeFBO = nullptr; - // If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change' - class CachedFBOMtl* m_lastUsedFBO = nullptr; + MetalActiveFBOState m_activeFBO; + // If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change + MetalActiveFBOState m_lastUsedFBO; MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}}; // TODO: find out what is the max number of bound textures on the Wii U From 944cc8be7d4f721dad7d3320f3c8eefe87197f17 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 17:47:47 +0200 Subject: [PATCH 05/20] store loaded pipelines --- .../Renderer/Metal/MetalPipelineCache.cpp | 29 +++++++------- .../Latte/Renderer/Metal/MetalPipelineCache.h | 39 ++++--------------- 2 files changed, 20 insertions(+), 48 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index bb533b7f2..910794aa5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -14,6 +14,7 @@ #include "HW/Latte/ISA/LatteReg.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" #include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" +#include "Metal/MTLRenderPipeline.hpp" #include "util/helpers/helpers.h" #include "config/ActiveSettings.h" #include @@ -49,8 +50,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); pipeline = compiler.Compile(false, true); - if (!HasPipelineCached(vertexShader->baseHash, hash)) - AddCurrentStateToCache(vertexShader->baseHash, hash); + AddCurrentStateToCache(hash); return pipeline; } @@ -351,6 +351,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader); + MTL::RenderPipelineState* pipeline = nullptr; // compile { MetalPipelineCompiler pp(m_mtlr); @@ -362,15 +363,18 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) // s_spinlockSharedInternal.unlock(); // return; //} - pp.Compile(true, true); + pipeline = pp.Compile(true, true); // destroy pp early } + // on success, calculate pipeline hash and flag as present in cache - uint64 pipelineBaseHash = vertexShader->baseHash; - uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); - m_pipelineIsCachedLock.lock(); - m_pipelineIsCached.emplace(pipelineBaseHash, pipelineStateHash); - m_pipelineIsCachedLock.unlock(); + if (pipeline) + { + uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); + m_pipelineCacheLock.lock(); + m_pipelineCache[pipelineStateHash] = pipeline; + m_pipelineCacheLock.unlock(); + } // clean up s_spinlockSharedInternal.lock(); @@ -379,17 +383,10 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) s_spinlockSharedInternal.unlock(); } -bool MetalPipelineCache::HasPipelineCached(uint64 baseHash, uint64 pipelineStateHash) -{ - PipelineHash ph(baseHash, pipelineStateHash); - return m_pipelineIsCached.find(ph) != m_pipelineIsCached.end(); -} - ConcurrentQueue g_mtlPipelineCachingQueue; -void MetalPipelineCache::AddCurrentStateToCache(uint64 baseHash, uint64 pipelineStateHash) +void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash) { - m_pipelineIsCached.emplace(baseHash, pipelineStateHash); if (!m_pipelineCacheStoreThread) { m_pipelineCacheStoreThread = new std::thread(&MetalPipelineCache::WorkerThread, this); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index d74b50904..be26bdee0 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -7,29 +7,6 @@ // TODO: binary archives class MetalPipelineCache { -private: - struct PipelineHash - { - PipelineHash(uint64 h0, uint64 h1) : h0(h0), h1(h1) {}; - - uint64 h0; - uint64 h1; - - bool operator==(const PipelineHash& r) const - { - return h0 == r.h0 && h1 == r.h1; - } - - struct HashFunc - { - size_t operator()(const PipelineHash& v) const - { - static_assert(sizeof(uint64) == sizeof(size_t)); - return v.h0 ^ v.h1; - } - }; - }; - public: static MetalPipelineCache& GetInstance(); @@ -45,13 +22,6 @@ class MetalPipelineCache void LoadPipelineFromCache(std::span fileData); void Close(); // called on title exit - bool HasPipelineCached(uint64 baseHash, uint64 pipelineStateHash); - void AddCurrentStateToCache(uint64 baseHash, uint64 pipelineStateHash); - - // pipeline serialization for file - bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline); - bool DeserializePipeline(class MemStreamReader& memReader, struct CachedPipeline& cachedPipeline); - // Debug size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); } @@ -59,11 +29,10 @@ class MetalPipelineCache class MetalRenderer* m_mtlr; std::map m_pipelineCache; + FSpinlock m_pipelineCacheLock; std::thread* m_pipelineCacheStoreThread; - std::unordered_set m_pipelineIsCached; - FSpinlock m_pipelineIsCachedLock; class FileCache* s_cache; std::atomic_uint32_t m_numCompilationThreads{ 0 }; @@ -72,6 +41,12 @@ class MetalPipelineCache static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + void AddCurrentStateToCache(uint64 pipelineStateHash); + + // pipeline serialization for file + bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline); + bool DeserializePipeline(class MemStreamReader& memReader, struct CachedPipeline& cachedPipeline); + int CompilerThread(); void WorkerThread(); }; From 79f5586c6ce51835bc73ddcdbb38748b87611e56 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 18:32:12 +0200 Subject: [PATCH 06/20] report pipeline compilation count --- .../Renderer/Metal/MetalPipelineCache.cpp | 4 +- .../Renderer/Metal/MetalPipelineCompiler.cpp | 51 +++++++++++-------- .../Renderer/Metal/MetalPipelineCompiler.h | 2 +- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 910794aa5..4a202cc44 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -48,7 +48,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte MetalPipelineCompiler compiler(m_mtlr); compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); - pipeline = compiler.Compile(false, true); + pipeline = compiler.Compile(false, true, true); AddCurrentStateToCache(hash); @@ -363,7 +363,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) // s_spinlockSharedInternal.unlock(); // return; //} - pipeline = pp.Compile(true, true); + pipeline = pp.Compile(true, true, false); // destroy pp early } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index e715ae26d..33d1ee7fc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -10,8 +10,11 @@ #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShader.h" -#include "HW/Latte/ISA/LatteReg.h" -#include "Metal/MTLPixelFormat.hpp" +#include + +extern std::atomic_int g_compiling_pipelines; +extern std::atomic_int g_compiling_pipelines_async; +extern std::atomic_uint64_t g_compiling_pipelines_syncTimeSum; static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) { @@ -318,8 +321,12 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c InitFromStateRender(fetchShader, vertexShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); } -MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread) +MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) { + MTL::RenderPipelineState* pipeline = nullptr; + NS::Error* error = nullptr; + + auto start = std::chrono::high_resolution_clock::now(); if (m_usesGeometryShader) { auto desc = static_cast(m_pipelineDescriptor); @@ -328,15 +335,7 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Mesh render pipeline state", desc)); #endif - MTL::RenderPipelineState* pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); - desc->release(); - if (error) - { - cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - - return pipeline; + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); } else { @@ -346,15 +345,27 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Render pipeline state", desc)); #endif - MTL::RenderPipelineState* pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); - if (error) - { - cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - - return pipeline; + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); } + auto end = std::chrono::high_resolution_clock::now(); + + auto creationDuration = std::chrono::duration_cast(end - start).count(); + + if (error) + { + cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + else if (showInOverlay) + { + if (isRenderThread) + g_compiling_pipelines_syncTimeSum += creationDuration; + else + g_compiling_pipelines_async++; + g_compiling_pipelines++; + } + + return pipeline; } void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index e1e3e7543..39a4b8a4c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -13,7 +13,7 @@ class MetalPipelineCompiler void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); - MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread); + MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread, bool showInOverlay); private: class MetalRenderer* m_mtlr; From d1c69e99459f90b86e9fe1b9ae5bd2b70b766d38 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 19:19:16 +0200 Subject: [PATCH 07/20] set shader just before compiling --- .../Renderer/Metal/MetalPipelineCompiler.cpp | 64 ++++++++++--------- .../Renderer/Metal/MetalPipelineCompiler.h | 7 +- 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 33d1ee7fc..ee01f04bb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -10,6 +10,8 @@ #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShader.h" +#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" +#include "HW/Latte/Renderer/RendererShader.h" #include extern std::atomic_int g_compiling_pipelines; @@ -194,7 +196,7 @@ extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; template -void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) +void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { // Rasterization bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); @@ -211,16 +213,13 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn if (cullFront && cullBack) rasterizationEnabled = false; - auto pixelShaderMtl = static_cast(pixelShader->shader); - - if (!rasterizationEnabled || !pixelShaderMtl) + // TODO: check if the pixel shader is valid as well? + if (!rasterizationEnabled/* || !pixelShaderMtl*/) { desc->setRasterizationEnabled(false); return; } - desc->setFragmentFunction(pixelShaderMtl->GetFunction()); - // Color attachments const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); @@ -310,15 +309,29 @@ MetalPipelineCompiler::~MetalPipelineCompiler() void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { + // Shaders + m_vertexShader = static_cast(vertexShader->shader); + if (geometryShader) + { + m_geometryShader = static_cast(geometryShader->shader); + } + else + { + // If there is no geometry shader, it means that we are emulating rects + m_geometryShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); + } + m_pixelShader = static_cast(pixelShader->shader); + + // Check if the pipeline uses a geometry shader const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); if (m_usesGeometryShader) - InitFromStateMesh(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); + InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); else - InitFromStateRender(fetchShader, vertexShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); + InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); } MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) @@ -331,6 +344,11 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool { auto desc = static_cast(m_pipelineDescriptor); + // Shaders + desc->setObjectFunction(m_vertexShader->GetFunction()); + desc->setMeshFunction(m_geometryShader->GetFunction()); + desc->setFragmentFunction(m_pixelShader->GetFunction()); + NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Mesh render pipeline state", desc)); @@ -341,6 +359,10 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool { auto desc = static_cast(m_pipelineDescriptor); + // Shaders + desc->setVertexFunction(m_vertexShader->GetFunction()); + desc->setFragmentFunction(m_pixelShader->GetFunction()); + NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Render pipeline state", desc)); @@ -368,14 +390,10 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool return pipeline; } -void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { - // Shaders - auto vertexShaderMtl = static_cast(vertexShader->shader); - // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexShaderMtl->GetFunction()); // Vertex descriptor if (!fetchShader->mtlFetchVertexManually) @@ -447,7 +465,7 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha vertexDescriptor->release(); } - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, pixelShader, lcr); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); m_pipelineDescriptor = desc; @@ -508,26 +526,12 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha */ } -void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { - auto objectShaderMtl = static_cast(vertexShader->shader); - RendererShaderMtl* meshShaderMtl; - if (geometryShader) - { - meshShaderMtl = static_cast(geometryShader->shader); - } - else - { - // If there is no geometry shader, it means that we are emulating rects - meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); - } - // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); - desc->setObjectFunction(objectShaderMtl->GetFunction()); - desc->setMeshFunction(meshShaderMtl->GetFunction()); - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, pixelShader, lcr); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); m_pipelineDescriptor = desc; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index 39a4b8a4c..4f0febefb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -18,6 +18,9 @@ class MetalPipelineCompiler private: class MetalRenderer* m_mtlr; + const class RendererShaderMtl* m_vertexShader; + const class RendererShaderMtl* m_geometryShader; + const class RendererShaderMtl* m_pixelShader; bool m_usesGeometryShader; /* @@ -28,9 +31,9 @@ class MetalPipelineCompiler */ NS::Object* m_pipelineDescriptor; - void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); - void InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); //void TryLoadBinaryArchive(); }; From cbde7f983cdd2ec5736281a79257f759349b41b4 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 19:48:32 +0200 Subject: [PATCH 08/20] force compile shaders if needed --- .../Renderer/Metal/MetalPipelineCompiler.cpp | 55 +++++++++++++------ .../Renderer/Metal/MetalPipelineCompiler.h | 6 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 28 ++++------ 3 files changed, 51 insertions(+), 38 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index ee01f04bb..54aa83b1b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -309,25 +309,22 @@ MetalPipelineCompiler::~MetalPipelineCompiler() void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { - // Shaders - m_vertexShader = static_cast(vertexShader->shader); - if (geometryShader) - { - m_geometryShader = static_cast(geometryShader->shader); - } - else - { - // If there is no geometry shader, it means that we are emulating rects - m_geometryShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); - } - m_pixelShader = static_cast(pixelShader->shader); - // Check if the pipeline uses a geometry shader const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + // Shaders + m_vertexShaderMtl = static_cast(vertexShader->shader); + if (geometryShader) + m_geometryShaderMtl = static_cast(geometryShader->shader); + else if (isPrimitiveRect) + m_geometryShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); + else + m_geometryShaderMtl = nullptr; + m_pixelShaderMtl = static_cast(pixelShader->shader); + if (m_usesGeometryShader) InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); else @@ -336,6 +333,28 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) { + if (forceCompile) + { + // if some shader stages are not compiled yet, compile them now + if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled()) + m_vertexShaderMtl->PreponeCompilation(isRenderThread); + if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled()) + m_geometryShaderMtl->PreponeCompilation(isRenderThread); + if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled()) + m_pixelShaderMtl->PreponeCompilation(isRenderThread); + } + else + { + // fail early if some shader stages are not compiled + if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled()) + return nullptr; + if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled()) + return nullptr; + if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled()) + return nullptr; + } + + // Compile MTL::RenderPipelineState* pipeline = nullptr; NS::Error* error = nullptr; @@ -345,9 +364,9 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool auto desc = static_cast(m_pipelineDescriptor); // Shaders - desc->setObjectFunction(m_vertexShader->GetFunction()); - desc->setMeshFunction(m_geometryShader->GetFunction()); - desc->setFragmentFunction(m_pixelShader->GetFunction()); + desc->setObjectFunction(m_vertexShaderMtl->GetFunction()); + desc->setMeshFunction(m_geometryShaderMtl->GetFunction()); + desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT @@ -360,8 +379,8 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool auto desc = static_cast(m_pipelineDescriptor); // Shaders - desc->setVertexFunction(m_vertexShader->GetFunction()); - desc->setFragmentFunction(m_pixelShader->GetFunction()); + desc->setVertexFunction(m_vertexShaderMtl->GetFunction()); + desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index 4f0febefb..f39b1fb5e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -18,9 +18,9 @@ class MetalPipelineCompiler private: class MetalRenderer* m_mtlr; - const class RendererShaderMtl* m_vertexShader; - const class RendererShaderMtl* m_geometryShader; - const class RendererShaderMtl* m_pixelShader; + class RendererShaderMtl* m_vertexShaderMtl; + class RendererShaderMtl* m_geometryShaderMtl; + class RendererShaderMtl* m_pixelShaderMtl; bool m_usesGeometryShader; /* diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 17050326f..2b420e6e2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -944,15 +944,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Shaders LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); - if (vertexShader && !vertexShader->shader->IsCompiled()) - return; LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); - if (geometryShader && !geometryShader->shader->IsCompiled()) - return; LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); const auto fetchShader = LatteSHRC_GetActiveFetchShader(); - if (vertexShader && !pixelShader->shader->IsCompiled()) - return; bool neverSkipAccurateBarrier = false; @@ -1004,6 +998,17 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Render pass auto renderCommandEncoder = GetRenderCommandEncoder(); + // Render pipeline state + MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew); + if (!renderPipelineState) + return; + + if (renderPipelineState != encoderState.m_renderPipelineState) + { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + encoderState.m_renderPipelineState = renderPipelineState; + } + // Depth stencil state // Disable depth write when there is no depth attachment @@ -1222,17 +1227,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex); //} - // Render pipeline state - MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew); - if (!renderPipelineState) - return; - - if (renderPipelineState != encoderState.m_renderPipelineState) - { - renderCommandEncoder->setRenderPipelineState(renderPipelineState); - encoderState.m_renderPipelineState = renderPipelineState; - } - // Prepare streamout m_state.m_streamoutState.verticesPerInstance = count; LatteStreamout_PrepareDrawcall(count, instanceCount); From 4dcb858ab8acf6036c29132fcb2b9d1149e28f86 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 20:03:26 +0200 Subject: [PATCH 09/20] check if pipeline is eligible for serializing --- .../Renderer/Metal/MetalPipelineCache.cpp | 12 +++++++++--- .../Renderer/Metal/MetalPipelineCompiler.cpp | 18 ++++++++++-------- .../Renderer/Metal/MetalPipelineCompiler.h | 6 +++--- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 4a202cc44..476417d3f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -10,6 +10,7 @@ #include "Cafe/HW/Latte/Common/RegisterSerializer.h" #include "Cafe/HW/Latte/Core/LatteShaderCache.h" #include "Cemu/FileCache/FileCache.h" +#include "Common/precompiled.h" #include "HW/Latte/Core/LatteShader.h" #include "HW/Latte/ISA/LatteReg.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" @@ -47,10 +48,13 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte return pipeline; MetalPipelineCompiler compiler(m_mtlr); - compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); + bool fbosMatch; + compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); pipeline = compiler.Compile(false, true, true); - AddCurrentStateToCache(hash); + // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache + if (fbosMatch) + AddCurrentStateToCache(hash); return pipeline; } @@ -355,7 +359,9 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) // compile { MetalPipelineCompiler pp(m_mtlr); - pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); + bool fbosMatch; + pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr, fbosMatch); + cemu_assert_debug(fbosMatch); //{ // s_spinlockSharedInternal.lock(); // delete lcr; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 54aa83b1b..9b865fb51 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -196,7 +196,7 @@ extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; template -void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) { // Rasterization bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); @@ -221,6 +221,7 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn } // Color attachments + fbosMatch = true; const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); @@ -238,6 +239,7 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT) { colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); + fbosMatch = false; continue; } @@ -307,7 +309,7 @@ MetalPipelineCompiler::~MetalPipelineCompiler() m_pipelineDescriptor->release(); } -void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) { // Check if the pipeline uses a geometry shader const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); @@ -326,9 +328,9 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c m_pixelShaderMtl = static_cast(pixelShader->shader); if (m_usesGeometryShader) - InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); + InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); else - InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); + InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); } MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) @@ -409,7 +411,7 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool return pipeline; } -void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) { // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); @@ -484,7 +486,7 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha vertexDescriptor->release(); } - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); m_pipelineDescriptor = desc; @@ -545,12 +547,12 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha */ } -void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) { // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); m_pipelineDescriptor = desc; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index f39b1fb5e..3b9731a3e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -11,7 +11,7 @@ class MetalPipelineCompiler MetalPipelineCompiler(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} ~MetalPipelineCompiler(); - void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread, bool showInOverlay); @@ -31,9 +31,9 @@ class MetalPipelineCompiler */ NS::Object* m_pipelineDescriptor; - void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); - void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); //void TryLoadBinaryArchive(); }; From 7d9194a738abf9697c77167e37fc96c18c581254 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 15 Oct 2024 20:24:04 +0200 Subject: [PATCH 10/20] don't overshadow error --- src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 9b865fb51..94292e046 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -370,7 +370,6 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool desc->setMeshFunction(m_geometryShaderMtl->GetFunction()); desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); - NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Mesh render pipeline state", desc)); #endif @@ -384,7 +383,6 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool desc->setVertexFunction(m_vertexShaderMtl->GetFunction()); desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); - NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Render pipeline state", desc)); #endif From 8f2385a69071d5febbe1359001c34eb5e49cc927 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 16 Oct 2024 19:20:25 +0200 Subject: [PATCH 11/20] use lcr instead of contextNew --- src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 94292e046..910b354b1 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -264,8 +264,8 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) { colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); - colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); - colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); + colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); + colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); } else { @@ -312,7 +312,7 @@ MetalPipelineCompiler::~MetalPipelineCompiler() void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) { // Check if the pipeline uses a geometry shader - const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); + const LattePrimitiveMode primitiveMode = static_cast(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE()); bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); From 15eb6bb37f0011257d040791dac9b549baecbf4f Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 19 Oct 2024 09:29:14 +0200 Subject: [PATCH 12/20] fix: pipeline cache with mesh shaders --- .../Renderer/Metal/MetalPipelineCache.cpp | 13 +++-- .../Renderer/Metal/MetalPipelineCompiler.cpp | 48 ++++++++++--------- .../Renderer/Metal/MetalPipelineCompiler.h | 1 + 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 476417d3f..214c822ff 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -43,19 +43,21 @@ MetalPipelineCache::~MetalPipelineCache() MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); - auto& pipeline = m_pipelineCache[hash]; - if (pipeline) - return pipeline; + auto it = m_pipelineCache.find(hash); + if (it != m_pipelineCache.end()) + return it->second; MetalPipelineCompiler compiler(m_mtlr); bool fbosMatch; compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); - pipeline = compiler.Compile(false, true, true); + MTL::RenderPipelineState* pipeline = compiler.Compile(false, true, true); // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache if (fbosMatch) AddCurrentStateToCache(hash); + m_pipelineCache.insert({hash, pipeline}); + return pipeline; } @@ -355,6 +357,9 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader); + // TODO: this shouldn't probably be called directly + LatteShader_UpdatePSInputs(lcr->GetRawView()); + MTL::RenderPipelineState* pipeline = nullptr; // compile { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 910b354b1..d46358853 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -10,8 +10,7 @@ #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShader.h" -#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" -#include "HW/Latte/Renderer/RendererShader.h" + #include extern std::atomic_int g_compiling_pipelines; @@ -196,23 +195,8 @@ extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; template -void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) +void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr, bool& fbosMatch) { - // Rasterization - bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); - - // HACK - // TODO: include this in the hash? - if (!lcr.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) - rasterizationEnabled = true; - - // Culling both front and back faces effectively disables rasterization - const auto& polygonControlReg = lcr.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - rasterizationEnabled = false; - // TODO: check if the pixel shader is valid as well? if (!rasterizationEnabled/* || !pixelShaderMtl*/) { @@ -317,6 +301,21 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + // Rasterization + m_rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + + // HACK + // TODO: include this in the hash? + if (!lcr.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + m_rasterizationEnabled = true; + + // Culling both front and back faces effectively disables rasterization + const auto& polygonControlReg = lcr.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + m_rasterizationEnabled = false; + // Shaders m_vertexShaderMtl = static_cast(vertexShader->shader); if (geometryShader) @@ -368,7 +367,8 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool // Shaders desc->setObjectFunction(m_vertexShaderMtl->GetFunction()); desc->setMeshFunction(m_geometryShaderMtl->GetFunction()); - desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); + if (m_rasterizationEnabled) + desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Mesh render pipeline state", desc)); @@ -381,7 +381,8 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool // Shaders desc->setVertexFunction(m_vertexShaderMtl->GetFunction()); - desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); + if (m_rasterizationEnabled) + desc->setFragmentFunction(m_pixelShaderMtl->GetFunction()); #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Render pipeline state", desc)); @@ -397,7 +398,8 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); error->release(); } - else if (showInOverlay) + + if (showInOverlay) { if (isRenderThread) g_compiling_pipelines_syncTimeSum += creationDuration; @@ -484,7 +486,7 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha vertexDescriptor->release(); } - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); m_pipelineDescriptor = desc; @@ -550,7 +552,7 @@ void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShade // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); m_pipelineDescriptor = desc; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index 3b9731a3e..5965c764a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -22,6 +22,7 @@ class MetalPipelineCompiler class RendererShaderMtl* m_geometryShaderMtl; class RendererShaderMtl* m_pixelShaderMtl; bool m_usesGeometryShader; + bool m_rasterizationEnabled; /* std::map m_pipelineCache; From 295a6ed9fd07f3791c2a71db5ae1776775a4a5bf Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 19 Oct 2024 09:39:55 +0200 Subject: [PATCH 13/20] only add pipeline to cache if compilation was attempted --- .../HW/Latte/Renderer/Metal/MetalPipelineCache.cpp | 11 ++++++++--- .../HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp | 5 ++++- .../HW/Latte/Renderer/Metal/MetalPipelineCompiler.h | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 214c822ff..07277e68f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -50,13 +50,16 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte MetalPipelineCompiler compiler(m_mtlr); bool fbosMatch; compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); - MTL::RenderPipelineState* pipeline = compiler.Compile(false, true, true); + bool attemptedCompilation = false; + MTL::RenderPipelineState* pipeline = compiler.Compile(false, true, true, attemptedCompilation); // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache if (fbosMatch) AddCurrentStateToCache(hash); - m_pipelineCache.insert({hash, pipeline}); + // Place the pipeline to the cache if the compilation was at least attempted + if (attemptedCompilation) + m_pipelineCache.insert({hash, pipeline}); return pipeline; } @@ -374,7 +377,9 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) // s_spinlockSharedInternal.unlock(); // return; //} - pipeline = pp.Compile(true, true, false); + bool attemptedCompilation = false; + pipeline = pp.Compile(true, true, false, attemptedCompilation); + cemu_assert_debug(attemptedCompilation); // destroy pp early } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index d46358853..73b86fe91 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -332,7 +332,7 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); } -MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) +MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation) { if (forceCompile) { @@ -408,6 +408,9 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool g_compiling_pipelines++; } + // Inform the pipeline cache that compilation was at least attempted + attemptedCompilation = true; + return pipeline; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index 5965c764a..e40675559 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -13,7 +13,7 @@ class MetalPipelineCompiler void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); - MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread, bool showInOverlay); + MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation); private: class MetalRenderer* m_mtlr; From 17507157914a4f0f30eb7f1454d9ec845ea7610e Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 19 Oct 2024 15:32:45 +0200 Subject: [PATCH 14/20] retrieve ps input table without using global variable --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 35 +++++----- src/Cafe/HW/Latte/Core/LatteShader.h | 3 +- .../Renderer/Metal/MetalPipelineCache.cpp | 5 +- .../Renderer/Metal/MetalPipelineCompiler.cpp | 67 +++++++++---------- 4 files changed, 54 insertions(+), 56 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index bc1279c32..9e3e6b1f6 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -209,11 +209,9 @@ void LatteShader_free(LatteDecompilerShader* shader) delete shader; } -// both vertex and geometry/pixel shader depend on PS inputs -// we prepare the PS import info in advance -void LatteShader_UpdatePSInputs(uint32* contextRegisters) +void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters) { - // PS control + // PS control uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0]; uint32 spi0_positionEnable = (psControl0 >> 8) & 1; uint32 spi0_positionCentroid = (psControl0 >> 9) & 1; @@ -242,12 +240,12 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters) { key += std::rotr(spi0_paramGen, 7); key += std::rotr(spi0_paramGenAddr, 3); - _activePSImportTable.paramGen = spi0_paramGen; - _activePSImportTable.paramGenGPR = spi0_paramGenAddr; + psInputTable->paramGen = spi0_paramGen; + psInputTable->paramGenGPR = spi0_paramGenAddr; } else { - _activePSImportTable.paramGen = 0; + psInputTable->paramGen = 0; } // semantic imports from vertex shader @@ -281,9 +279,9 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters) key = std::rotl(key, 7); if (spi0_positionEnable && f == spi0_positionAddr) { - _activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION; - _activePSImportTable.import[f].isFlat = false; - _activePSImportTable.import[f].isNoPerspective = false; + psInputTable->import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION; + psInputTable->import[f].isFlat = false; + psInputTable->import[f].isNoPerspective = false; key += (uint64)0x33; } else @@ -296,13 +294,20 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters) semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7)); #endif - _activePSImportTable.import[f].semanticId = psSemanticId; - _activePSImportTable.import[f].isFlat = (psInputControl&(1 << 10)) != 0; - _activePSImportTable.import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0; + psInputTable->import[f].semanticId = psSemanticId; + psInputTable->import[f].isFlat = (psInputControl&(1 << 10)) != 0; + psInputTable->import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0; } } - _activePSImportTable.key = key; - _activePSImportTable.count = numPSInputs; + psInputTable->key = key; + psInputTable->count = numPSInputs; +} + +// both vertex and geometry/pixel shader depend on PS inputs +// we prepare the PS import info in advance +void LatteShader_UpdatePSInputs(uint32* contextRegisters) +{ + LatteShader_CreatePSInputTable(&_activePSImportTable, contextRegisters); } void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.h b/src/Cafe/HW/Latte/Core/LatteShader.h index f8dc6d1a3..85d53b01b 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.h +++ b/src/Cafe/HW/Latte/Core/LatteShader.h @@ -84,6 +84,7 @@ struct LatteShaderPSInputTable } }; +void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters); void LatteShader_UpdatePSInputs(uint32* contextRegisters); LatteShaderPSInputTable* LatteSHRC_GetPSInputTable(); @@ -126,4 +127,4 @@ void LatteShaderCache_writeSeparableGeometryShader(uint64 shaderBaseHash, uint64 void LatteShaderCache_writeSeparablePixelShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* pixelShader, uint32 pixelShaderSize, uint32* contextRegisters, bool usesGeometryShader); // todo - refactor this -sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType); \ No newline at end of file +sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 07277e68f..9e49959c9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -54,7 +54,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte MTL::RenderPipelineState* pipeline = compiler.Compile(false, true, true, attemptedCompilation); // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache - if (fbosMatch) + if (pipeline && fbosMatch) AddCurrentStateToCache(hash); // Place the pipeline to the cache if the compilation was at least attempted @@ -360,9 +360,6 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader); - // TODO: this shouldn't probably be called directly - LatteShader_UpdatePSInputs(lcr->GetRawView()); - MTL::RenderPipelineState* pipeline = nullptr; // compile { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 73b86fe91..a8bce2913 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -17,18 +17,18 @@ extern std::atomic_int g_compiling_pipelines; extern std::atomic_int g_compiling_pipelines_async; extern std::atomic_uint64_t g_compiling_pipelines_syncTimeSum; -static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) +static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) { auto parameterMask = vertexShader->outputParameterMask; for (uint32 i = 0; i < 32; i++) { if ((parameterMask & (1 << i)) == 0) continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); if (vsSemanticId < 0) continue; // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + if (!psInputTable.hasPSImportForSemanticId(vsSemanticId)) continue; gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId)); } @@ -36,18 +36,18 @@ static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteD gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx)); } -static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) +static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, const char* variant, const LatteContextRegister& latteRegister) { auto parameterMask = vertexShader->outputParameterMask; for (uint32 i = 0; i < 32; i++) { if ((parameterMask & (1 << i)) == 0) continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); if (vsSemanticId < 0) continue; // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + if (!psInputTable.hasPSImportForSemanticId(vsSemanticId)) continue; gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); } @@ -55,7 +55,7 @@ static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const Lat gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n")); } -static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) +static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) { sint32 pList[4] = { p0, p1, p2, p3 }; for (sint32 i = 0; i < 4; i++) @@ -79,7 +79,8 @@ static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer gsSrc.append("#include \r\n"); gsSrc.append("using namespace metal;\r\n"); - LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); + LatteShaderPSInputTable psInputTable; + LatteShader_CreatePSInputTable(&psInputTable, latteRegister.GetRawView()); // inputs & outputs std::string vertexOutDefinition = "struct VertexOut {\r\n"; @@ -87,35 +88,29 @@ static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer std::string geometryOutDefinition = "struct GeometryOut {\r\n"; geometryOutDefinition += "float4 position [[position]];\r\n"; auto parameterMask = vertexShader->outputParameterMask; - for (sint32 f = 0; f < 2; f++) + for (uint32 i = 0; i < 32; i++) { - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); - if (psImport == nullptr) - continue; - - if (f == 0) - { - vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); - } - else - { - geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); - - geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId)); - if (psImport->isFlat) - geometryOutDefinition += " [[flat]]"; - if (psImport->isNoPerspective) - geometryOutDefinition += " [[center_no_perspective]]"; - geometryOutDefinition += ";\r\n"; - } - } + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + auto psImport = psInputTable.getPSImportBySemanticId(vsSemanticId); + if (psImport == nullptr) + continue; + + // VertexOut + vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); + + // GeometryOut + geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); + + geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable.getPSImportLocationBySemanticId(vsSemanticId)); + if (psImport->isFlat) + geometryOutDefinition += " [[flat]]"; + if (psImport->isNoPerspective) + geometryOutDefinition += " [[center_no_perspective]]"; + geometryOutDefinition += ";\r\n"; } vertexOutDefinition += "};\r\n"; geometryOutDefinition += "};\r\n"; From b8021b642d31d0ff7514460690a84e3959c010da Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 24 Oct 2024 17:15:24 +0200 Subject: [PATCH 15/20] fix: incorrect texture usages --- src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp | 5 +---- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index 142870501..c6a5012bd 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -65,7 +65,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM } else if (textureType == MTL::TextureTypeCube) { - // Do notjing + // Do nothing } else if (textureType == MTL::TextureTypeCubeArray) { @@ -81,13 +81,10 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM MTL::TextureUsage usage = MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView; if (!Latte::IsCompressedFormat(format)) - { usage |= MTL::TextureUsageRenderTarget; - } desc->setUsage(usage); m_texture = mtlRenderer->GetDevice()->newTexture(desc); - desc->release(); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 2b420e6e2..e560c2c33 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -23,7 +23,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" -#include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" #include "config/CemuConfig.h" #define IMGUI_IMPL_METAL_CPP @@ -70,6 +69,7 @@ MetalRenderer::MetalRenderer() MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType1D); textureDescriptor->setWidth(1); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead); m_nullTexture1D = m_device->newTexture(textureDescriptor); #ifdef CEMU_DEBUG_ASSERT m_nullTexture1D->setLabel(GetLabel("Null texture 1D", m_nullTexture1D)); @@ -77,6 +77,7 @@ MetalRenderer::MetalRenderer() textureDescriptor->setTextureType(MTL::TextureType2D); textureDescriptor->setHeight(1); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageRenderTarget); m_nullTexture2D = m_device->newTexture(textureDescriptor); #ifdef CEMU_DEBUG_ASSERT m_nullTexture2D->setLabel(GetLabel("Null texture 2D", m_nullTexture2D)); From 665eb23e4a48d480cf5cbc08a9e77350ddf85e36 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Mon, 28 Oct 2024 16:11:47 +0100 Subject: [PATCH 16/20] fix: incorrect pipeline compilation time report --- .../Renderer/Metal/MetalPipelineCache.cpp | 2 - .../Latte/Renderer/Metal/MetalPipelineCache.h | 1 - .../Renderer/Metal/MetalPipelineCompiler.cpp | 132 +----------------- .../Renderer/Metal/MetalPipelineCompiler.h | 8 -- 4 files changed, 1 insertion(+), 142 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 9e49959c9..bc77e00f3 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -35,9 +35,7 @@ MetalPipelineCache::MetalPipelineCache(class MetalRenderer* metalRenderer) : m_m MetalPipelineCache::~MetalPipelineCache() { for (auto& [key, value] : m_pipelineCache) - { value->release(); - } } MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index be26bdee0..b1307568d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -4,7 +4,6 @@ #include "util/helpers/ConcurrentQueue.h" #include "util/helpers/fspinlock.h" -// TODO: binary archives class MetalPipelineCache { public: diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index a8bce2913..6dd6087b1 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -386,7 +386,7 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool } auto end = std::chrono::high_resolution_clock::now(); - auto creationDuration = std::chrono::duration_cast(end - start).count(); + auto creationDuration = std::chrono::duration_cast(end - start).count(); if (error) { @@ -479,7 +479,6 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha layout->setStride(bufferStride); } - // TODO: don't always set the vertex descriptor? desc->setVertexDescriptor(vertexDescriptor); vertexDescriptor->release(); } @@ -487,62 +486,6 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); m_pipelineDescriptor = desc; - - //TryLoadBinaryArchive(); - - // Load binary - /* - if (m_binaryArchive) - { - NS::Object* binArchives[] = {m_binaryArchive}; - auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); - desc->setBinaryArchives(binaryArchives); - binaryArchives->release(); - } - */ - - /* - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Cached render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); - - // Pipeline wasn't found in the binary archive, we need to compile it - if (error) - { - desc->setBinaryArchives(nullptr); - - error->release(); - error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("New render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - else - { - // Save binary - if (m_binaryArchive) - { - NS::Error* error = nullptr; - m_binaryArchive->addRenderPipelineFunctions(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - } - } - desc->release(); - - return pipeline; - */ } void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) @@ -553,77 +496,4 @@ void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShade SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); m_pipelineDescriptor = desc; - - //TryLoadBinaryArchive(); - - // Load binary - // TODO: no binary archives? :( - - /* - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Mesh pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); - desc->release(); - if (error) - { - cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - - return pipeline; - */ -} - -/* -void MetalPipelineCache::TryLoadBinaryArchive() -{ - if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID) - return; - - // GPU name - const char* deviceName1 = m_mtlr->GetDevice()->name()->utf8String(); - std::string deviceName; - deviceName.assign(deviceName1); - - // Replace spaces with underscores - for (auto& c : deviceName) - { - if (c == ' ') - c = '_'; - } - - // OS version - auto osVersion = NS::ProcessInfo::processInfo()->operatingSystemVersion(); - - // Precompiled binaries cannot be shared between different devices or OS versions - const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId); - const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}/{}-{}-{}/{}", deviceName, osVersion.majorVersion, osVersion.minorVersion, osVersion.patchVersion, cacheFilename); - - // Create the directory if it doesn't exist - std::filesystem::create_directories(cachePath.parent_path()); - - m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str())); - - MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init(); - desc->setUrl(m_binaryArchiveURL); - - NS::Error* error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - desc->setUrl(nullptr); - - error->release(); - error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - desc->release(); } -*/ diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index e40675559..e3fab932a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -24,17 +24,9 @@ class MetalPipelineCompiler bool m_usesGeometryShader; bool m_rasterizationEnabled; - /* - std::map m_pipelineCache; - - NS::URL* m_binaryArchiveURL; - MTL::BinaryArchive* m_binaryArchive; - */ NS::Object* m_pipelineDescriptor; void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); - - //void TryLoadBinaryArchive(); }; From bca32c43d0023ae298fc1a3e6235a0f50f3932cb Mon Sep 17 00:00:00 2001 From: Samuliak Date: Mon, 28 Oct 2024 16:38:17 +0100 Subject: [PATCH 17/20] refactor the way pipelines are stored --- .../Renderer/Metal/MetalPipelineCache.cpp | 53 ++++++++----------- .../Latte/Renderer/Metal/MetalPipelineCache.h | 4 +- .../Renderer/Metal/MetalPipelineCompiler.cpp | 13 +++-- .../Renderer/Metal/MetalPipelineCompiler.h | 10 +++- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 11 ++-- 5 files changed, 45 insertions(+), 46 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index bc77e00f3..58c432f5f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -15,6 +15,7 @@ #include "HW/Latte/ISA/LatteReg.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" #include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" +#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" #include "Metal/MTLRenderPipeline.hpp" #include "util/helpers/helpers.h" #include "config/ActiveSettings.h" @@ -34,32 +35,32 @@ MetalPipelineCache::MetalPipelineCache(class MetalRenderer* metalRenderer) : m_m MetalPipelineCache::~MetalPipelineCache() { - for (auto& [key, value] : m_pipelineCache) - value->release(); + for (auto& [key, pipelineObj] : m_pipelineCache) + { + pipelineObj->m_pipeline->release(); + delete pipelineObj; + } } -MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); - auto it = m_pipelineCache.find(hash); - if (it != m_pipelineCache.end()) - return it->second; + PipelineObject*& pipelineObj = m_pipelineCache[hash]; + if (pipelineObj) + return pipelineObj; + + pipelineObj = new PipelineObject(); - MetalPipelineCompiler compiler(m_mtlr); + MetalPipelineCompiler compiler(m_mtlr, *pipelineObj); bool fbosMatch; compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); - bool attemptedCompilation = false; - MTL::RenderPipelineState* pipeline = compiler.Compile(false, true, true, attemptedCompilation); + compiler.Compile(false, true, true); // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache - if (pipeline && fbosMatch) + if (fbosMatch) AddCurrentStateToCache(hash); - // Place the pipeline to the cache if the compilation was at least attempted - if (attemptedCompilation) - m_pipelineCache.insert({hash, pipeline}); - - return pipeline; + return pipelineObj; } uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) @@ -358,32 +359,24 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader); - MTL::RenderPipelineState* pipeline = nullptr; + PipelineObject* pipelineObject = new PipelineObject(); + // compile { - MetalPipelineCompiler pp(m_mtlr); + MetalPipelineCompiler pp(m_mtlr, *pipelineObject); bool fbosMatch; pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr, fbosMatch); cemu_assert_debug(fbosMatch); - //{ - // s_spinlockSharedInternal.lock(); - // delete lcr; - // delete cachedPipeline; - // s_spinlockSharedInternal.unlock(); - // return; - //} - bool attemptedCompilation = false; - pipeline = pp.Compile(true, true, false, attemptedCompilation); - cemu_assert_debug(attemptedCompilation); + pp.Compile(true, true, false); // destroy pp early } - // on success, calculate pipeline hash and flag as present in cache - if (pipeline) + // on success, cache the pipeline + if (pipelineObject->m_pipeline) { uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); m_pipelineCacheLock.lock(); - m_pipelineCache[pipelineStateHash] = pipeline; + m_pipelineCache[pipelineStateHash] = pipelineObject; m_pipelineCacheLock.unlock(); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index b1307568d..f4f5e9635 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -12,7 +12,7 @@ class MetalPipelineCache MetalPipelineCache(class MetalRenderer* metalRenderer); ~MetalPipelineCache(); - MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); // Cache loading uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache @@ -27,7 +27,7 @@ class MetalPipelineCache private: class MetalRenderer* m_mtlr; - std::map m_pipelineCache; + std::map m_pipelineCache; FSpinlock m_pipelineCacheLock; std::thread* m_pipelineCacheStoreThread; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 6dd6087b1..611d190dd 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -327,7 +327,7 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); } -MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation) +bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) { if (forceCompile) { @@ -343,11 +343,11 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool { // fail early if some shader stages are not compiled if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled()) - return nullptr; + return false; if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled()) - return nullptr; + return false; if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled()) - return nullptr; + return false; } // Compile @@ -403,10 +403,9 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool g_compiling_pipelines++; } - // Inform the pipeline cache that compilation was at least attempted - attemptedCompilation = true; + m_pipelineObj.m_pipeline = pipeline; - return pipeline; + return true; } void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index e3fab932a..d762d8025 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -5,18 +5,24 @@ #include "Cafe/HW/Latte/ISA/LatteReg.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" +struct PipelineObject +{ + MTL::RenderPipelineState* m_pipeline = nullptr; +}; + class MetalPipelineCompiler { public: - MetalPipelineCompiler(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {} ~MetalPipelineCompiler(); void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); - MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation); + bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay); private: class MetalRenderer* m_mtlr; + PipelineObject& m_pipelineObj; class RendererShaderMtl* m_vertexShaderMtl; class RendererShaderMtl* m_geometryShaderMtl; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index e560c2c33..b34747441 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -23,6 +23,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" #include "config/CemuConfig.h" #define IMGUI_IMPL_METAL_CPP @@ -1000,14 +1001,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 auto renderCommandEncoder = GetRenderCommandEncoder(); // Render pipeline state - MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew); - if (!renderPipelineState) + PipelineObject* pipelineObj = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew); + if (!pipelineObj->m_pipeline) return; - if (renderPipelineState != encoderState.m_renderPipelineState) + if (pipelineObj->m_pipeline != encoderState.m_renderPipelineState) { - renderCommandEncoder->setRenderPipelineState(renderPipelineState); - encoderState.m_renderPipelineState = renderPipelineState; + renderCommandEncoder->setRenderPipelineState(pipelineObj->m_pipeline); + encoderState.m_renderPipelineState = pipelineObj->m_pipeline; } // Depth stencil state From 4e3f94e87003d6061c2958f76288dfe4bae9efa9 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Mon, 28 Oct 2024 17:32:43 +0100 Subject: [PATCH 18/20] compile pipelines async --- .../Renderer/Metal/MetalPipelineCache.cpp | 96 ++++++++++++++++--- 1 file changed, 84 insertions(+), 12 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 58c432f5f..73951cf8c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,26 +1,78 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" -#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/Core/LatteConst.h" -#include "Cafe/HW/Latte/Core/LatteCachedFBO.h" #include "Cafe/HW/Latte/Common/RegisterSerializer.h" #include "Cafe/HW/Latte/Core/LatteShaderCache.h" #include "Cemu/FileCache/FileCache.h" #include "Common/precompiled.h" -#include "HW/Latte/Core/LatteShader.h" -#include "HW/Latte/ISA/LatteReg.h" -#include "HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" -#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" -#include "Metal/MTLRenderPipeline.hpp" +#include "Cafe/HW/Latte/Core/LatteShader.h" +#include "Cafe/HW/Latte/ISA/LatteReg.h" #include "util/helpers/helpers.h" #include "config/ActiveSettings.h" + #include +static bool g_compilePipelineThreadInit{false}; +static std::mutex g_compilePipelineMutex; +static std::condition_variable g_compilePipelineCondVar; +static std::queue g_compilePipelineRequests; + +static void compileThreadFunc(sint32 threadIndex) +{ + SetThreadName("compilePl"); + + // one thread runs at normal priority while the others run at lower priority + if(threadIndex != 0) + ; // TODO: set thread priority + + while (true) + { + std::unique_lock lock(g_compilePipelineMutex); + while (g_compilePipelineRequests.empty()) + g_compilePipelineCondVar.wait(lock); + + MetalPipelineCompiler* request = g_compilePipelineRequests.front(); + + g_compilePipelineRequests.pop(); + + lock.unlock(); + + request->Compile(true, false, true); + delete request; + } +} + +static void initCompileThread() +{ + uint32 numCompileThreads; + + uint32 cpuCoreCount = GetPhysicalCoreCount(); + if (cpuCoreCount <= 2) + numCompileThreads = 1; + else + numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3 + + numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8 + + for (uint32 i = 0; i < numCompileThreads; i++) + { + std::thread compileThread(compileThreadFunc, i); + compileThread.detach(); + } +} + +static void queuePipeline(MetalPipelineCompiler* v) +{ + std::unique_lock lock(g_compilePipelineMutex); + g_compilePipelineRequests.push(std::move(v)); + lock.unlock(); + g_compilePipelineCondVar.notify_one(); +} + MetalPipelineCache* g_mtlPipelineCache = nullptr; MetalPipelineCache& MetalPipelineCache::GetInstance() @@ -51,10 +103,30 @@ PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShade pipelineObj = new PipelineObject(); - MetalPipelineCompiler compiler(m_mtlr, *pipelineObj); + MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj); bool fbosMatch; - compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); - compiler.Compile(false, true, true); + compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); + + bool allowAsyncCompile = false; + // TODO: uncomment + if (GetConfig().async_compile) + allowAsyncCompile = true;//IsAsyncPipelineAllowed(indexCount); + + if (allowAsyncCompile) + { + if (!g_compilePipelineThreadInit) + { + initCompileThread(); + g_compilePipelineThreadInit = true; + } + + queuePipeline(compiler); + } + else + { + compiler->Compile(false, true, true); + delete compiler; + } // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache if (fbosMatch) From 7906733bfa9bd62f1fe7b1d0ece967a32adc9587 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Mon, 28 Oct 2024 19:02:44 +0100 Subject: [PATCH 19/20] don't compile certain pipelines async --- .../Renderer/Metal/MetalPipelineCache.cpp | 25 ++++++++++++++++--- .../Latte/Renderer/Metal/MetalPipelineCache.h | 3 ++- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 2 +- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 73951cf8c..101b6d688 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -73,6 +73,23 @@ static void queuePipeline(MetalPipelineCompiler* v) g_compilePipelineCondVar.notify_one(); } +// make a guess if a pipeline is not essential +// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics +bool IsAsyncPipelineAllowed(const MetalAttachmentsInfo& attachmentsInfo, Vector2i extend, uint32 indexCount) +{ + if (extend.x == 1600 && extend.y == 1600) + return false; // Splatoon ink mechanics use 1600x1600 R8 and R8G8 framebuffers, this resolution is rare enough that we can just blacklist it globally + + if (attachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT) + return true; // aggressive filter but seems to work well so far + + // small index count (3,4,5,6) is often associated with full-viewport quads (which are considered essential due to often being used to generate persistent textures) + if (indexCount <= 6) + return false; + + return true; +} + MetalPipelineCache* g_mtlPipelineCache = nullptr; MetalPipelineCache& MetalPipelineCache::GetInstance() @@ -94,7 +111,7 @@ MetalPipelineCache::~MetalPipelineCache() } } -PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr) { uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); PipelineObject*& pipelineObj = m_pipelineCache[hash]; @@ -108,9 +125,8 @@ PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShade compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); bool allowAsyncCompile = false; - // TODO: uncomment if (GetConfig().async_compile) - allowAsyncCompile = true;//IsAsyncPipelineAllowed(indexCount); + allowAsyncCompile = IsAsyncPipelineAllowed(activeAttachmentsInfo, extend, indexCount); if (allowAsyncCompile) { @@ -124,7 +140,8 @@ PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShade } else { - compiler->Compile(false, true, true); + // Also force compile to ensure that the pipeline is ready + cemu_assert_debug(compiler->Compile(true, true, true)); delete compiler; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index f4f5e9635..d49ec6a25 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -3,6 +3,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" #include "util/helpers/ConcurrentQueue.h" #include "util/helpers/fspinlock.h" +#include "util/math/vector2.h" class MetalPipelineCache { @@ -12,7 +13,7 @@ class MetalPipelineCache MetalPipelineCache(class MetalRenderer* metalRenderer); ~MetalPipelineCache(); - PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr); // Cache loading uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index b34747441..dc4244ec0 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1001,7 +1001,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 auto renderCommandEncoder = GetRenderCommandEncoder(); // Render pipeline state - PipelineObject* pipelineObj = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew); + PipelineObject* pipelineObj = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, m_state.m_activeFBO.m_fbo->m_size, count, LatteGPUState.contextNew); if (!pipelineObj->m_pipeline) return; From 85db0dc4685bf4b4d9bb761e4232b5e32cb577c1 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 29 Oct 2024 07:44:47 +0100 Subject: [PATCH 20/20] cache all pipelines --- .../Renderer/Metal/MetalPipelineCache.cpp | 32 +++++++++++++------ .../Latte/Renderer/Metal/MetalPipelineCache.h | 2 +- .../Renderer/Metal/MetalPipelineCompiler.cpp | 18 +++++------ .../Renderer/Metal/MetalPipelineCompiler.h | 6 ++-- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 101b6d688..d49060fbd 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -11,6 +11,7 @@ #include "Common/precompiled.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/ISA/LatteReg.h" +#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" #include "util/helpers/helpers.h" #include "config/ActiveSettings.h" @@ -121,8 +122,7 @@ PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShade pipelineObj = new PipelineObject(); MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj); - bool fbosMatch; - compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); + compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); bool allowAsyncCompile = false; if (GetConfig().async_compile) @@ -145,9 +145,8 @@ PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShade delete compiler; } - // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache - if (fbosMatch) - AddCurrentStateToCache(hash); + // Save to cache + AddCurrentStateToCache(hash, lastUsedAttachmentsInfo); return pipelineObj; } @@ -380,6 +379,8 @@ struct CachedPipeline ShaderHash gsHash; ShaderHash psHash; + MetalAttachmentsInfo lastUsedAttachmentsInfo; + Latte::GPUCompactedRegisterState gpuState; }; @@ -453,9 +454,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) // compile { MetalPipelineCompiler pp(m_mtlr, *pipelineObject); - bool fbosMatch; - pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr, fbosMatch); - cemu_assert_debug(fbosMatch); + pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr); pp.Compile(true, true, false); // destroy pp early } @@ -463,7 +462,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) // on success, cache the pipeline if (pipelineObject->m_pipeline) { - uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); + uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr); m_pipelineCacheLock.lock(); m_pipelineCache[pipelineStateHash] = pipelineObject; m_pipelineCacheLock.unlock(); @@ -478,7 +477,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) ConcurrentQueue g_mtlPipelineCachingQueue; -void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash) +void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash, const MetalAttachmentsInfo& lastUsedAttachmentsInfo) { if (!m_pipelineCacheStoreThread) { @@ -499,6 +498,7 @@ void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash) job->gsHash.set(gs->baseHash, gs->auxHash); if (ps) job->psHash.set(ps->baseHash, ps->auxHash); + job->lastUsedAttachmentsInfo = lastUsedAttachmentsInfo; Latte::StoreGPURegisterState(LatteGPUState.contextNew, job->gpuState); // queue job g_mtlPipelineCachingQueue.push(job); @@ -530,7 +530,13 @@ bool MetalPipelineCache::SerializePipeline(MemStreamWriter& memWriter, CachedPip memWriter.writeBE(cachedPipeline.psHash.baseHash); memWriter.writeBE(cachedPipeline.psHash.auxHash); } + + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + memWriter.writeBE((uint16)cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i]); + memWriter.writeBE((uint16)cachedPipeline.lastUsedAttachmentsInfo.depthFormat); + Latte::SerializeRegisterState(cachedPipeline.gpuState, memWriter); + return true; } @@ -562,12 +568,18 @@ bool MetalPipelineCache::DeserializePipeline(MemStreamReader& memReader, CachedP uint64 auxHash = memReader.readBE(); cachedPipeline.psHash.set(baseHash, auxHash); } + + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i] = (Latte::E_GX2SURFFMT)memReader.readBE(); + cachedPipeline.lastUsedAttachmentsInfo.depthFormat = (Latte::E_GX2SURFFMT)memReader.readBE(); + // deserialize GPU state if (!Latte::DeserializeRegisterState(cachedPipeline.gpuState, memReader)) { return false; } cemu_assert_debug(!memReader.hasError()); + return true; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index d49ec6a25..270c2db72 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -41,7 +41,7 @@ class MetalPipelineCache static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); - void AddCurrentStateToCache(uint64 pipelineStateHash); + void AddCurrentStateToCache(uint64 pipelineStateHash, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo); // pipeline serialization for file bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index 611d190dd..9d74e2d92 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -190,7 +190,7 @@ extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; template -void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr, bool& fbosMatch) +void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr) { // TODO: check if the pixel shader is valid as well? if (!rasterizationEnabled/* || !pixelShaderMtl*/) @@ -200,7 +200,6 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn } // Color attachments - fbosMatch = true; const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); @@ -218,7 +217,6 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT) { colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); - fbosMatch = false; continue; } @@ -288,7 +286,7 @@ MetalPipelineCompiler::~MetalPipelineCompiler() m_pipelineDescriptor->release(); } -void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) +void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { // Check if the pipeline uses a geometry shader const LattePrimitiveMode primitiveMode = static_cast(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE()); @@ -322,9 +320,9 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c m_pixelShaderMtl = static_cast(pixelShader->shader); if (m_usesGeometryShader) - InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); + InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); else - InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); + InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); } bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) @@ -408,7 +406,7 @@ bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool return true; } -void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) +void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); @@ -482,17 +480,17 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha vertexDescriptor->release(); } - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr); m_pipelineDescriptor = desc; } -void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) +void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) { // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); - SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); + SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr); m_pipelineDescriptor = desc; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index d762d8025..5006ed595 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -16,7 +16,7 @@ class MetalPipelineCompiler MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {} ~MetalPipelineCompiler(); - void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); + void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay); @@ -32,7 +32,7 @@ class MetalPipelineCompiler NS::Object* m_pipelineDescriptor; - void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); + void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); - void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); + void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); };