Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipeline cache #7

Merged
merged 21 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/Cafe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ if(APPLE)
endif()

if(ENABLE_METAL)
# TODO: sort alphabetically
target_sources(CemuCafe PRIVATE
HW/Latte/Renderer/Metal/MetalRenderer.cpp
HW/Latte/Renderer/Metal/MetalRenderer.h
Expand All @@ -555,11 +556,15 @@ if(ENABLE_METAL)
HW/Latte/Renderer/Metal/RendererShaderMtl.h
HW/Latte/Renderer/Metal/CachedFBOMtl.cpp
HW/Latte/Renderer/Metal/CachedFBOMtl.h
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
HW/Latte/Renderer/Metal/MetalBufferAllocator.h
HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
HW/Latte/Renderer/Metal/MetalMemoryManager.h
HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp
HW/Latte/Renderer/Metal/MetalOutputShaderCache.h
HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
HW/Latte/Renderer/Metal/MetalPipelineCache.h
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
Expand Down
35 changes: 20 additions & 15 deletions src/Cafe/HW/Latte/Core/LatteShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,9 @@ void LatteShader_free(LatteDecompilerShader* shader)
delete shader;
}

// both vertex and geometry/pixel shader depend on PS inputs
// we prepare the PS import info in advance
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters)
{
// PS control
// PS control
uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0];
uint32 spi0_positionEnable = (psControl0 >> 8) & 1;
uint32 spi0_positionCentroid = (psControl0 >> 9) & 1;
Expand Down Expand Up @@ -242,12 +240,12 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
{
key += std::rotr<uint64>(spi0_paramGen, 7);
key += std::rotr<uint64>(spi0_paramGenAddr, 3);
_activePSImportTable.paramGen = spi0_paramGen;
_activePSImportTable.paramGenGPR = spi0_paramGenAddr;
psInputTable->paramGen = spi0_paramGen;
psInputTable->paramGenGPR = spi0_paramGenAddr;
}
else
{
_activePSImportTable.paramGen = 0;
psInputTable->paramGen = 0;
}

// semantic imports from vertex shader
Expand Down Expand Up @@ -281,9 +279,9 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
key = std::rotl<uint64>(key, 7);
if (spi0_positionEnable && f == spi0_positionAddr)
{
_activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
_activePSImportTable.import[f].isFlat = false;
_activePSImportTable.import[f].isNoPerspective = false;
psInputTable->import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
psInputTable->import[f].isFlat = false;
psInputTable->import[f].isNoPerspective = false;
key += (uint64)0x33;
}
else
Expand All @@ -296,13 +294,20 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7));
#endif

_activePSImportTable.import[f].semanticId = psSemanticId;
_activePSImportTable.import[f].isFlat = (psInputControl&(1 << 10)) != 0;
_activePSImportTable.import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
psInputTable->import[f].semanticId = psSemanticId;
psInputTable->import[f].isFlat = (psInputControl&(1 << 10)) != 0;
psInputTable->import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
}
}
_activePSImportTable.key = key;
_activePSImportTable.count = numPSInputs;
psInputTable->key = key;
psInputTable->count = numPSInputs;
}

// both vertex and geometry/pixel shader depend on PS inputs
// we prepare the PS import info in advance
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
{
LatteShader_CreatePSInputTable(&_activePSImportTable, contextRegisters);
}

void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync)
Expand Down
3 changes: 2 additions & 1 deletion src/Cafe/HW/Latte/Core/LatteShader.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ struct LatteShaderPSInputTable
}
};

void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters);
void LatteShader_UpdatePSInputs(uint32* contextRegisters);
LatteShaderPSInputTable* LatteSHRC_GetPSInputTable();

Expand Down Expand Up @@ -126,4 +127,4 @@ void LatteShaderCache_writeSeparableGeometryShader(uint64 shaderBaseHash, uint64
void LatteShaderCache_writeSeparablePixelShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* pixelShader, uint32 pixelShaderSize, uint32* contextRegisters, bool usesGeometryShader);

// todo - refactor this
sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType);
sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType);
53 changes: 22 additions & 31 deletions src/Cafe/HW/Latte/Core/LatteShaderCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ FileCache* s_shaderCacheGeneric = nullptr; // contains hardware and version inde
#define SHADER_CACHE_TYPE_PIXEL (2)

bool LatteShaderCache_readSeparableShader(uint8* shaderInfoData, sint32 shaderInfoSize);
void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId);
void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId);
bool LatteShaderCache_updatePipelineLoadingProgress();
void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateFunc, bool isPipelines);

Expand Down Expand Up @@ -160,18 +160,11 @@ bool LoadTGAFile(const std::vector<uint8>& buffer, TGAFILE *tgaFile)
void LatteShaderCache_finish()
{
if (g_renderer->GetType() == RendererAPI::Vulkan)
{
RendererShaderVk::ShaderCacheLoading_end();
}
else if (g_renderer->GetType() == RendererAPI::OpenGL)
{
RendererShaderGL::ShaderCacheLoading_end();
}
else if (g_renderer->GetType() == RendererAPI::Metal)
{
RendererShaderMtl::ShaderCacheLoading_end();
MetalPipelineCache::ShaderCacheLoading_end();
}
}

uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId)
Expand Down Expand Up @@ -251,18 +244,11 @@ void LatteShaderCache_Load()
fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec);
// initialize renderer specific caches
if (g_renderer->GetType() == RendererAPI::Vulkan)
{
RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId);
}
else if (g_renderer->GetType() == RendererAPI::OpenGL)
{
RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId);
}
else if (g_renderer->GetType() == RendererAPI::Metal)
{
RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId);
MetalPipelineCache::ShaderCacheLoading_begin(cacheTitleId);
}
// get cache file name
const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId);
const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0
Expand Down Expand Up @@ -361,9 +347,9 @@ void LatteShaderCache_Load()
cemuLog_log(LogType::Force, "Shader cache loaded with {} shaders. Commited mem {}MB. Took {}ms", numLoadedShaders, (sint32)(memCommited/1024/1024), timeLoad);
#endif
LatteShaderCache_finish();
// if Vulkan then also load pipeline cache
if (g_renderer->GetType() == RendererAPI::Vulkan)
LatteShaderCache_LoadVulkanPipelineCache(cacheTitleId);
// if Vulkan or Metal then also load pipeline cache
if (g_renderer->GetType() == RendererAPI::Vulkan || g_renderer->GetType() == RendererAPI::Metal)
LatteShaderCache_LoadPipelineCache(cacheTitleId);


g_renderer->BeginFrame(true);
Expand Down Expand Up @@ -518,21 +504,31 @@ void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateF
}
}

void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId)
void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId)
{
auto& pipelineCache = VulkanPipelineStableCache::GetInstance();
g_shaderCacheLoaderState.pipelineFileCount = pipelineCache.BeginLoading(cacheTitleId);
if (g_renderer->GetType() == RendererAPI::Vulkan)
g_shaderCacheLoaderState.pipelineFileCount = VulkanPipelineStableCache::GetInstance().BeginLoading(cacheTitleId);
else if (g_renderer->GetType() == RendererAPI::Metal)
g_shaderCacheLoaderState.pipelineFileCount = MetalPipelineCache::GetInstance().BeginLoading(cacheTitleId);
g_shaderCacheLoaderState.loadedPipelines = 0;
LatteShaderCache_ShowProgress(LatteShaderCache_updatePipelineLoadingProgress, true);
pipelineCache.EndLoading();
if (g_renderer->GetType() == RendererAPI::Vulkan)
VulkanPipelineStableCache::GetInstance().EndLoading();
else if (g_renderer->GetType() == RendererAPI::Metal)
MetalPipelineCache::GetInstance().EndLoading();
if(Latte_GetStopSignal())
LatteThread_Exit();
}

bool LatteShaderCache_updatePipelineLoadingProgress()
{
uint32 pipelinesMissingShaders = 0;
return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
if (g_renderer->GetType() == RendererAPI::Vulkan)
return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
else if (g_renderer->GetType() == RendererAPI::Metal)
return MetalPipelineCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);

return false;
}

uint64 LatteShaderCache_getShaderNameInTransferableCache(uint64 baseHash, uint32 shaderType)
Expand Down Expand Up @@ -791,22 +787,17 @@ void LatteShaderCache_Close()
s_shaderCacheGeneric = nullptr;
}
if (g_renderer->GetType() == RendererAPI::Vulkan)
{
RendererShaderVk::ShaderCacheLoading_Close();
}
else if (g_renderer->GetType() == RendererAPI::OpenGL)
{
RendererShaderGL::ShaderCacheLoading_Close();
}
else if (g_renderer->GetType() == RendererAPI::Metal)
{
RendererShaderMtl::ShaderCacheLoading_Close();
MetalPipelineCache::ShaderCacheLoading_Close();
}

// if Vulkan then also close pipeline cache
// if Vulkan or Metal then also close pipeline cache
if (g_renderer->GetType() == RendererAPI::Vulkan)
VulkanPipelineStableCache::GetInstance().Close();
else if (g_renderer->GetType() == RendererAPI::Metal)
MetalPipelineCache::GetInstance().Close();
}

#include <wx/msgdlg.h>
Expand Down
5 changes: 1 addition & 4 deletions src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM
}
else if (textureType == MTL::TextureTypeCube)
{
// Do notjing
// Do nothing
}
else if (textureType == MTL::TextureTypeCubeArray)
{
Expand All @@ -81,13 +81,10 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM

MTL::TextureUsage usage = MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView;
if (!Latte::IsCompressedFormat(format))
{
usage |= MTL::TextureUsageRenderTarget;
}
desc->setUsage(usage);

m_texture = mtlRenderer->GetDevice()->newTexture(desc);

desc->release();
}

Expand Down
48 changes: 48 additions & 0 deletions src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"

MetalAttachmentsInfo::MetalAttachmentsInfo(class CachedFBOMtl* fbo)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
const auto& colorBuffer = fbo->colorBuffer[i];
auto texture = static_cast<LatteTextureViewMtl*>(colorBuffer.texture);
if (!texture)
continue;

colorFormats[i] = texture->format;
}

// Depth stencil attachment
if (fbo->depthBuffer.texture)
{
auto texture = static_cast<LatteTextureViewMtl*>(fbo->depthBuffer.texture);
depthFormat = texture->format;
hasStencil = fbo->depthBuffer.hasStencil;
}
}

MetalAttachmentsInfo::MetalAttachmentsInfo(const LatteContextRegister& lcr, const LatteDecompilerShader* pixelShader)
{
uint8 cbMask = LatteMRT::GetActiveColorBufferMask(pixelShader, lcr);
bool dbMask = LatteMRT::GetActiveDepthBufferMask(lcr);

// Color attachments
for (int i = 0; i < 8; ++i)
{
if ((cbMask & (1 << i)) == 0)
continue;

colorFormats[i] = LatteMRT::GetColorBufferFormat(i, lcr);
}

// Depth stencil attachment
if (dbMask)
{
Latte::E_GX2SURFFMT format = LatteMRT::GetDepthBufferFormat(lcr);
depthFormat = format;
hasStencil = GetMtlPixelFormatInfo(format, true).hasStencil;
}
}
15 changes: 15 additions & 0 deletions src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"

class MetalAttachmentsInfo
{
public:
MetalAttachmentsInfo() = default;
MetalAttachmentsInfo(class CachedFBOMtl* fbo);
MetalAttachmentsInfo(const LatteContextRegister& lcr, const class LatteDecompilerShader* pixelShader);

Latte::E_GX2SURFFMT colorFormats[LATTE_NUM_COLOR_TARGET] = {Latte::E_GX2SURFFMT::INVALID_FORMAT};
Latte::E_GX2SURFFMT depthFormat = Latte::E_GX2SURFFMT::INVALID_FORMAT;
bool hasStencil = false;
};
Loading
Loading