From 1fa9ce126b0f5b24707b4ca79111a964827cd787 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 23 Jul 2024 10:54:01 +0200 Subject: [PATCH 01/14] add: period at the end of a sentence --- src/miniaudio.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/miniaudio.cpp b/src/miniaudio.cpp index e42fea683..a61979e0a 100644 --- a/src/miniaudio.cpp +++ b/src/miniaudio.cpp @@ -1,5 +1,5 @@ // We do not need the ability to be able to encode or decode audio files for the time being -// So we disable said functionality to make the executable smaller +// So we disable said functionality to make the executable smaller. #define MA_NO_DECODING #define MA_NO_ENCODING #define MINIAUDIO_IMPLEMENTATION From 98b5d560215d5899d12bea2ff0f161263bc71d8b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 10:06:56 +0200 Subject: [PATCH 02/14] metal: add all the files --- .gitignore | 4 + CMakeLists.txt | 86 +- .../renderer_mtl/mtl_blit_pipeline_cache.hpp | 75 ++ .../renderer_mtl/mtl_depth_stencil_cache.hpp | 86 ++ .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 174 ++++ include/renderer_mtl/mtl_render_target.hpp | 92 +++ include/renderer_mtl/mtl_texture.hpp | 77 ++ .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 80 ++ include/renderer_mtl/objc_helper.hpp | 16 + include/renderer_mtl/pica_to_mtl.hpp | 155 ++++ include/renderer_mtl/renderer_mtl.hpp | 189 +++++ src/core/renderer_mtl/metal_cpp_impl.cpp | 6 + src/core/renderer_mtl/mtl_etc1.cpp | 124 +++ src/core/renderer_mtl/mtl_texture.cpp | 312 +++++++ src/core/renderer_mtl/objc_helper.mm | 12 + src/core/renderer_mtl/renderer_mtl.cpp | 774 +++++++++++++++++ .../metal_copy_to_lut_texture.metal | 9 + src/host_shaders/metal_shaders.metal | 782 ++++++++++++++++++ 18 files changed, 3041 insertions(+), 12 deletions(-) create mode 100644 include/renderer_mtl/mtl_blit_pipeline_cache.hpp create mode 100644 include/renderer_mtl/mtl_depth_stencil_cache.hpp create mode 100644 include/renderer_mtl/mtl_draw_pipeline_cache.hpp create mode 100644 include/renderer_mtl/mtl_render_target.hpp create mode 100644 include/renderer_mtl/mtl_texture.hpp create mode 100644 include/renderer_mtl/mtl_vertex_buffer_cache.hpp create mode 100644 include/renderer_mtl/objc_helper.hpp create mode 100644 include/renderer_mtl/pica_to_mtl.hpp create mode 100644 include/renderer_mtl/renderer_mtl.hpp create mode 100644 src/core/renderer_mtl/metal_cpp_impl.cpp create mode 100644 src/core/renderer_mtl/mtl_etc1.cpp create mode 100644 src/core/renderer_mtl/mtl_texture.cpp create mode 100644 src/core/renderer_mtl/objc_helper.mm create mode 100644 src/core/renderer_mtl/renderer_mtl.cpp create mode 100644 src/host_shaders/metal_copy_to_lut_texture.metal create mode 100644 src/host_shaders/metal_shaders.metal diff --git a/.gitignore b/.gitignore index 528462ad6..817786a36 100644 --- a/.gitignore +++ b/.gitignore @@ -64,5 +64,9 @@ fb.bat *.elf *.smdh +# Compiled Metal shader files +*.ir +*.metallib + config.toml CMakeSettings.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 2865a3f8e..31fdd9f27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,16 +26,17 @@ endif() if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") -endif() +endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") -endif() +endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON) +option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_TESTS "Compile unit-tests" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) @@ -55,11 +56,6 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) - # Disable stack buffer overflow checks in user builds - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") -endif() - add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -240,7 +236,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp - include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp + include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp @@ -251,7 +247,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp - include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp + include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp @@ -260,7 +256,6 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp - include/sdl_gyro.hpp ) cmrc_add_resource_library( @@ -418,8 +413,75 @@ if(ENABLE_VULKAN) target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk) endif() +if(ENABLE_METAL AND APPLE) + set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp + include/renderer_mtl/mtl_depth_stencil_cache.hpp + include/renderer_mtl/mtl_blit_pipeline_cache.hpp + include/renderer_mtl/mtl_draw_pipeline_cache.hpp + include/renderer_mtl/mtl_render_target.hpp + include/renderer_mtl/mtl_texture.hpp + include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/pica_to_mtl.hpp + include/renderer_mtl/objc_helper.hpp + ) + + set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp + src/core/renderer_mtl/renderer_mtl.cpp + src/core/renderer_mtl/mtl_texture.cpp + src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/objc_helper.mm + src/host_shaders/metal_shaders.metal + src/host_shaders/metal_copy_to_lut_texture.metal + ) + + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) + source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES}) + + set(RENDERER_MTL_HOST_SHADERS_SOURCES) + function (add_metal_shader SHADER) + set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal") + set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir") + set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib") + # TODO: only include sources in debug builds + add_custom_command( + OUTPUT ${SHADER_IR} + COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE} + DEPENDS ${SHADER_SOURCE} + VERBATIM) + add_custom_command( + OUTPUT ${SHADER_METALLIB} + COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR} + DEPENDS ${SHADER_IR} + VERBATIM) + set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB}) + endfunction() + + add_metal_shader(metal_shaders) + add_metal_shader(metal_copy_to_lut_texture) + + add_custom_target( + compile_msl_shaders + DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES} + ) + + cmrc_add_resource_library( + resources_renderer_mtl + NAMESPACE RendererMTL + WHENCE "src/host_shaders/" + "src/host_shaders/metal_shaders.metallib" + "src/host_shaders/metal_copy_to_lut_texture.metallib" + ) + add_dependencies(resources_renderer_mtl compile_msl_shaders) + + target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES}) + target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1") + target_include_directories(AlberCore PRIVATE third_party/metal-cpp) + # TODO: check if all of them are needed + target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl) +endif() + source_group("Header Files\\Core" FILES ${HEADER_FILES}) -set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} +set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES}) target_sources(AlberCore PRIVATE ${ALL_SOURCES}) @@ -508,7 +570,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") + set(FRONTEND_HEADER_FILES "") endif() target_link_libraries(Alber PRIVATE AlberCore) diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp new file mode 100644 index 000000000..264226353 --- /dev/null +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -0,0 +1,75 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; +}; + +// This pipeline only caches the pipeline with all of its color and depth attachment variations +class BlitPipelineCache { +public: + BlitPipelineCache() = default; + + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } + + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } + + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } + +private: + std::map pipelineCache; + + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp new file mode 100644 index 000000000..90721b700 --- /dev/null +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -0,0 +1,86 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct DepthStencilHash { + bool depthStencilWrite; + u8 depthFunc; + u32 stencilConfig; + u16 stencilOpConfig; +}; + +class DepthStencilCache { +public: + DepthStencilCache() = default; + + ~DepthStencilCache() { + reset(); + } + + void set(MTL::Device* dev) { + device = dev; + } + + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); + + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } + + depthStencilState = device->newDepthStencilState(desc); + + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } + + return depthStencilState; + } + + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } + +private: + std::map depthStencilCache; + + MTL::Device* device; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp new file mode 100644 index 000000000..8bfea6366 --- /dev/null +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -0,0 +1,174 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct DrawFragmentFunctionHash { + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + u32 lightingConfig1; // 32 bits (TODO: check this) + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) +}; + +//bool operator==(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { +// return ((l.lightingEnabled == r.lightingEnabled) && (l.lightingNumLights == r.lightingNumLights) && +// (l.lightingConfig1 == r.lightingConfig1) && (l.alphaControl == r.alphaControl)); +//} + +inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; + + return false; +} + +struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits + + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits + + DrawFragmentFunctionHash fragHash; +}; + +//bool operator==(const DrawPipelineHash& l, const DrawPipelineHash& r) { +// return (((u32)l.colorFmt == (u32)r.colorFmt) && ((u32)l.depthFmt == (u32)r.depthFmt) && +// (l.blendEnabled == r.blendEnabled) && (l.blendControl == r.blendControl) && +// (l.colorWriteMask == r.colorWriteMask) && (l.fragHash == r.fragHash)); +//} + +inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; + + return false; +} + +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +#define VERTEX_BUFFER_BINDING_INDEX 30 + +// This pipeline only caches the pipeline with all of its color and depth attachment variations +class DrawPipelineCache { +public: + DrawPipelineCache() = default; + + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } + + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } + + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + //u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); + //u64 pipelineHash = ((u64)hash.colorFmt << 53) | ((u64)hash.depthFmt << 50) | ((u64)hash.blendEnabled << 49) | ((u64)hash.colorWriteMask << 45) | ((((u64)hash.blendControl & 0b11111111111111110000000000000000) >> 16) << 29) | ((((u64)hash.blendControl & 0b0000011100000000) >> 8) << 26) | (((u64)hash.blendControl & 0b00000111) << 23) | fragmentFunctionHash; + auto& pipeline = pipelineCache[hash]; + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } + + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } + +private: + std::map pipelineCache; + std::map fragmentFunctionCache; + + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp new file mode 100644 index 000000000..73be45f4d --- /dev/null +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -0,0 +1,92 @@ +#pragma once +#include +#include +#include +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "pica_to_mtl.hpp" +#include "objc_helper.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + +template +struct RenderTarget { + MTL::Device* device; + + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + MTL::Texture* texture = nullptr; + + RenderTarget() : valid(false) {} + + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString(std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); + descriptor->release(); + } + + void free() { + valid = false; + + if (texture) { + texture->release(); + } + } + + u64 sizeInBytes() { + return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); + } +}; + +typedef RenderTarget ColorRenderTarget; +typedef RenderTarget DepthStencilRenderTarget; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp new file mode 100644 index 000000000..590132bd1 --- /dev/null +++ b/include/renderer_mtl/mtl_texture.hpp @@ -0,0 +1,77 @@ +#pragma once +#include +#include +#include +#include "PICA/regs.hpp" +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "renderer_mtl/pica_to_mtl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + +struct Texture { + MTL::Device* device; + + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; + + Texture() : valid(false) {} + + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); + + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + + // Returns the format of this texture as a string + std::string_view formatToString() { + return PICA::textureFormatToString(format); + } + + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp new file mode 100644 index 000000000..1760cdfa5 --- /dev/null +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -0,0 +1,80 @@ +#pragma once + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct BufferHandle { + MTL::Buffer* buffer; + size_t offset; +}; + +// 64MB buffer for caching vertex data +#define CACHE_BUFFER_SIZE 64 * 1024 * 1024 + +class VertexBufferCache { +public: + VertexBufferCache() = default; + + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } + + void set(MTL::Device* dev) { + device = dev; + create(); + } + + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } + + BufferHandle get(const void* data, size_t size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + + return BufferHandle{newBuffer, 0}; + } + + // Copy the data into the buffer + memcpy((char*)buffer->contents() + ptr, data, size); + + size_t oldPtr = ptr; + ptr += size; + + return BufferHandle{buffer, oldPtr}; + } + + void reset() { + endFrame(); + if (buffer) { + buffer->release(); + create(); + } + } + +private: + MTL::Buffer* buffer = nullptr; + size_t ptr = 0; + std::vector additionalAllocations; + + MTL::Device* device; + + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } +}; + +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp new file mode 100644 index 000000000..91756d241 --- /dev/null +++ b/include/renderer_mtl/objc_helper.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include + +#include + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size); + +} // namespace Metal + +// Cast from std::string to NS::String* +inline NS::String* toNSString(const std::string& str) { + return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); +} diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp new file mode 100644 index 000000000..de76dc3b5 --- /dev/null +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -0,0 +1,155 @@ +#pragma once + +#include +#include "PICA/regs.hpp" + +namespace PICA { + +struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; +}; + +constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 +}; + +inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { + return pixelFormatInfos[static_cast(format)]; +} + +inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } +} + +inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } +} + +inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: panic("Unknown compare function %u", func); + } + + return MTL::CompareFunctionAlways; +} + +inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: panic("Unknown blend operation %u", op); + } + + return MTL::BlendOperationAdd; +} + +inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: panic("Unknown blend factor %u", factor); + } + + return MTL::BlendFactorOne; +} + +inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: panic("Unknown stencil operation %u", op); + } + + return MTL::StencilOperationKeep; +} + +inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + //Helpers::warn("Geometry primitives are not yet, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + } +} + +inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: panic("Unknown sampler address mode %u", addrMode); + } + + return MTL::SamplerAddressModeClampToEdge; +} + +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp new file mode 100644 index 000000000..9ba0937a3 --- /dev/null +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -0,0 +1,189 @@ +#include +#include + +#include "renderer.hpp" +#include "mtl_texture.hpp" +#include "mtl_render_target.hpp" +#include "mtl_blit_pipeline_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_depth_stencil_cache.hpp" +#include "mtl_vertex_buffer_cache.hpp" +// HACK: use the OpenGL cache +#include "../renderer_gl/surface_cache.hpp" + +class GPU; + +struct Color4 { + float r, g, b, a; +}; + +class RendererMTL final : public Renderer { + public: + RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); + ~RendererMTL() override; + + void reset() override; + void display() override; + void initGraphicsContext(SDL_Window* window) override; + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; + void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; + void drawVertices(PICA::PrimType primType, std::span vertices) override; + void screenshot(const std::string& name) override; + void deinitGraphicsContext() override; + +#ifdef PANDA3DS_FRONTEND_QT + virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} +#endif + + private: + CA::MetalLayer* metalLayer; + + MTL::Device* device; + MTL::CommandQueue* commandQueue; + + // Libraries + MTL::Library* library; + + // Caches + SurfaceCache colorRenderTargetCache; + SurfaceCache depthStencilRenderTargetCache; + SurfaceCache textureCache; + Metal::BlitPipelineCache blitPipelineCache; + Metal::DrawPipelineCache drawPipelineCache; + Metal::DepthStencilCache depthStencilCache; + Metal::VertexBufferCache vertexBufferCache; + + // Objects + MTL::SamplerState* nearestSampler; + MTL::SamplerState* linearSampler; + MTL::Texture* lutTexture; + MTL::DepthStencilState* defaultDepthStencilState; + + // Pipelines + MTL::RenderPipelineState* displayPipeline; + MTL::RenderPipelineState* copyToLutTexturePipeline; + + // Clears + std::map colorClearOps; + std::map depthClearOps; + std::map stencilClearOps; + + // Active state + MTL::CommandBuffer* commandBuffer = nullptr; + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + MTL::Texture* lastColorTexture = nullptr; + MTL::Texture* lastDepthTexture = nullptr; + + // Debug + std::string nextRenderPassName; + + void createCommandBufferIfNeeded() { + if (!commandBuffer) { + commandBuffer = commandQueue->commandBuffer(); + } + } + + void endRenderPass() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } + } + + void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); + } + + void commitCommandBuffer() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } + + template + inline void clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, SetClearDataT setClearData) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } + + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + setClearData(attachment, clearData); + attachment->setLoadAction(MTL::LoadActionClear); + attachment->setStoreAction(MTL::StoreActionStore); + + if (beginRenderPass) { + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + } + } + + template + inline bool clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, GetAttachmentT getAttachment, SetClearDataT setClearData) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } + + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } + + return false; + } + + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, colorClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, [](auto attachment, auto& color) { + attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); + }); + } + + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, depthClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, [](auto attachment, auto& depth) { + attachment->setClearDepth(depth); + }); + } + + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, stencilClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, [](auto attachment, auto& stencil) { + attachment->setClearStencil(stencil); + }); + } + + std::optional getColorRenderTarget(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); + Metal::DepthStencilRenderTarget& getDepthRenderTarget(); + Metal::Texture& getTexture(Metal::Texture& tex); + void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); + void updateLightingLUT(MTL::RenderCommandEncoder* encoder); + void updateFogLUT(MTL::RenderCommandEncoder* encoder); + void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); +}; diff --git a/src/core/renderer_mtl/metal_cpp_impl.cpp b/src/core/renderer_mtl/metal_cpp_impl.cpp new file mode 100644 index 000000000..7fa7137b9 --- /dev/null +++ b/src/core/renderer_mtl/metal_cpp_impl.cpp @@ -0,0 +1,6 @@ +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include +#include +#include diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp new file mode 100644 index 000000000..a414df3c9 --- /dev/null +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -0,0 +1,124 @@ +#include +#include "colour.hpp" +#include "renderer_mtl/renderer_mtl.hpp" +#include "renderer_mtl/mtl_texture.hpp" + +using namespace Helpers; + +namespace Metal { + +static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); +} + +u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) + offs >>= 1; + + // In-tile offsets for u/v + u &= 7; + v &= 7; + + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? + + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; + + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* + + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } + else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } + + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); +} + +u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + { 2, 8 }, + { 5, 17 }, + { 9, 29 }, + { 13, 42 }, + { 18, 60 }, + { 24, 80 }, + { 33, 106 }, + { 47, 183 }, + }; + + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); + + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block + + if (flip) + std::swap(u, v); + + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); + + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } + + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp new file mode 100644 index 000000000..b61c55021 --- /dev/null +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -0,0 +1,312 @@ +#include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/objc_helper.hpp" +#include "colour.hpp" +#include + +using namespace Helpers; + +namespace Metal { + +void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); + descriptor->release(); + + setNewConfig(config); +} + +// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on +void Texture::setNewConfig(u32 cfg) { + config = cfg; + + if (sampler) { + sampler->release(); + } + + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); + + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); +} + +void Texture::free() { + valid = false; + + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } +} + +u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); + + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; + + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; + + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: + return pixelCount * 2; + + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: + return pixelCount; + + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: + return pixelCount / 2; + + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } + + default: + Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } +} + +// u and v are the UVs of the relevant texel +// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here +// https://en.wikipedia.org/wiki/Z-order_curve +// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel +// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 +// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg +u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 }; + static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 }; + + return xOffsets[u & 7] + yOffsets[v & 7]; +} + +// Get the byte offset of texel (u, v) in the texture +u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; +} + +// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte +u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; +} + +u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + // A8 + return alpha; + } + + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; + + // A8 + return alpha; + } + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; + + // RG8 + return (g << 8) | r; + } + + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); + + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } + + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); + + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } + + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); + + // B5G6R5 + return (r << 11) | (g << 5) | b; + } + + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } + + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; + + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; + + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; + + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/objc_helper.mm b/src/core/renderer_mtl/objc_helper.mm new file mode 100644 index 000000000..eeea56a0a --- /dev/null +++ b/src/core/renderer_mtl/objc_helper.mm @@ -0,0 +1,12 @@ +#include "renderer_mtl/objc_helper.hpp" + +// TODO: change the include +#import + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size) { + return dispatch_data_create(data, size, dispatch_get_global_queue(0, 0), ^{}); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp new file mode 100644 index 000000000..10bca5ddf --- /dev/null +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -0,0 +1,774 @@ +#include "PICA/gpu.hpp" +#include "renderer_mtl/renderer_mtl.hpp" +#include "renderer_mtl/objc_helper.hpp" + +#include +#include + +#include "SDL_metal.h" + +using namespace PICA; + +CMRC_DECLARE(RendererMTL); + +const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; + +// HACK: redefinition... +PICA::ColorFmt ToColorFormat(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + +MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { + //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + NS::Error* error = nullptr; + MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); + //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + if (error) { + Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + return library; +} + +RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} +RendererMTL::~RendererMTL() {} + +void RendererMTL::reset() { + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); + colorRenderTargetCache.reset(); +} + +void RendererMTL::display() { + CA::MetalDrawable* drawable = metalLayer->nextDrawable(); + if (!drawable) { + return; + } + + using namespace PICA::ExternalRegs; + + // Top screen + const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr]; + auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); + + if (topScreen) { + clearColor(nullptr, topScreen->get().texture); + } + + // Bottom screen + const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; + const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; + auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); + + if (bottomScreen) { + clearColor(nullptr, bottomScreen->get().texture); + } + + // -------- Draw -------- + commandBuffer->pushDebugGroup(toNSString("Display")); + + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0); + colorAttachment->setTexture(drawable->texture()); + colorAttachment->setLoadAction(MTL::LoadActionClear); + colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f}); + colorAttachment->setStoreAction(MTL::StoreActionStore); + + nextRenderPassName = "Display"; + beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture()); + renderCommandEncoder->setRenderPipelineState(displayPipeline); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + // Top screen + if (topScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + // Bottom screen + if (bottomScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + endRenderPass(); + + commandBuffer->presentDrawable(drawable); + + commandBuffer->popDebugGroup(); + + commitCommandBuffer(); + + // Inform the vertex buffer cache that the frame ended + vertexBufferCache.endFrame(); + + // Release + drawable->release(); +} + +void RendererMTL::initGraphicsContext(SDL_Window* window) { + // TODO: what should be the type of the view? + void* view = SDL_Metal_CreateView(window); + metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view); + device = MTL::CreateSystemDefaultDevice(); + metalLayer->setDevice(device); + commandQueue = device->newCommandQueue(); + + // -------- Objects -------- + + // Textures + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setTextureType(MTL::TextureType2D); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); + textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); + textureDescriptor->setHeight(Lights::LUT_Count + 1); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + + lutTexture = device->newTexture(textureDescriptor); + lutTexture->setLabel(toNSString("LUT texture")); + textureDescriptor->release(); + + // Samplers + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setLabel(toNSString("Sampler (nearest)")); + nearestSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setLabel(toNSString("Sampler (linear)")); + linearSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->release(); + + // -------- Pipelines -------- + + // Load shaders + auto mtlResources = cmrc::RendererMTL::get_filesystem(); + library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); + MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + + // Display + MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); + MTL::Function* fragmentDisplayFunction = library->newFunction(NS::String::string("fragmentDisplay", NS::ASCIIStringEncoding)); + + MTL::RenderPipelineDescriptor* displayPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + displayPipelineDescriptor->setVertexFunction(vertexDisplayFunction); + displayPipelineDescriptor->setFragmentFunction(fragmentDisplayFunction); + auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0); + displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm); + + NS::Error* error = nullptr; + displayPipelineDescriptor->setLabel(toNSString("Display pipeline")); + displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + displayPipelineDescriptor->release(); + vertexDisplayFunction->release(); + fragmentDisplayFunction->release(); + + // Blit + MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + + blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); + + // Draw + MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding)); + + // -------- Vertex descriptor -------- + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + + // Position + MTL::VertexAttributeDescriptor* positionAttribute = vertexDescriptor->attributes()->object(0); + positionAttribute->setFormat(MTL::VertexFormatFloat4); + positionAttribute->setOffset(offsetof(Vertex, s.positions)); + positionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Quaternion + MTL::VertexAttributeDescriptor* quaternionAttribute = vertexDescriptor->attributes()->object(1); + quaternionAttribute->setFormat(MTL::VertexFormatFloat4); + quaternionAttribute->setOffset(offsetof(Vertex, s.quaternion)); + quaternionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Color + MTL::VertexAttributeDescriptor* colorAttribute = vertexDescriptor->attributes()->object(2); + colorAttribute->setFormat(MTL::VertexFormatFloat4); + colorAttribute->setOffset(offsetof(Vertex, s.colour)); + colorAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 + MTL::VertexAttributeDescriptor* texCoord0Attribute = vertexDescriptor->attributes()->object(3); + texCoord0Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord0Attribute->setOffset(offsetof(Vertex, s.texcoord0)); + texCoord0Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 1 + MTL::VertexAttributeDescriptor* texCoord1Attribute = vertexDescriptor->attributes()->object(4); + texCoord1Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord1Attribute->setOffset(offsetof(Vertex, s.texcoord1)); + texCoord1Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 W + MTL::VertexAttributeDescriptor* texCoord0WAttribute = vertexDescriptor->attributes()->object(5); + texCoord0WAttribute->setFormat(MTL::VertexFormatFloat); + texCoord0WAttribute->setOffset(offsetof(Vertex, s.texcoord0_w)); + texCoord0WAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // View + MTL::VertexAttributeDescriptor* viewAttribute = vertexDescriptor->attributes()->object(6); + viewAttribute->setFormat(MTL::VertexFormatFloat3); + viewAttribute->setOffset(offsetof(Vertex, s.view)); + viewAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 2 + MTL::VertexAttributeDescriptor* texCoord2Attribute = vertexDescriptor->attributes()->object(7); + texCoord2Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord2Attribute->setOffset(offsetof(Vertex, s.texcoord2)); + texCoord2Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + MTL::VertexBufferLayoutDescriptor* vertexBufferLayout = vertexDescriptor->layouts()->object(VERTEX_BUFFER_BINDING_INDEX); + vertexBufferLayout->setStride(sizeof(Vertex)); + vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex); + vertexBufferLayout->setStepRate(1); + + drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); + + // Copy to LUT texture + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + + MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); + // Disable rasterization + copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false); + + error = nullptr; + copyToLutTexturePipelineDescriptor->setLabel(toNSString("Copy to LUT texture pipeline")); + copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + copyToLutTexturePipelineDescriptor->release(); + vertexCopyToLutTextureFunction->release(); + + // Depth stencil cache + depthStencilCache.set(device); + + // Vertex buffer cache + vertexBufferCache.set(device); + + // -------- Depth stencil state -------- + MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthStencilDescriptor->setLabel(toNSString("Default depth stencil state")); + defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); + depthStencilDescriptor->release(); + + // Release + copyToLutTextureLibrary->release(); +} + +void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + const auto color = colorRenderTargetCache.findFromAddress(startAddress); + if (color) { + const float r = Helpers::getBits<24, 8>(value) / 255.0f; + const float g = Helpers::getBits<16, 8>(value) / 255.0f; + const float b = Helpers::getBits<8, 8>(value) / 255.0f; + const float a = (value & 0xff) / 255.0f; + + colorClearOps[color->get().texture] = {r, g, b, a}; + + return; + } + + const auto depth = depthStencilRenderTargetCache.findFromAddress(startAddress); + if (depth) { + float depthVal; + const auto format = depth->get().format; + if (format == DepthFmt::Depth16) { + depthVal = (value & 0xffff) / 65535.0f; + } else { + depthVal = (value & 0xffffff) / 16777215.0f; + } + + depthClearOps[depth->get().texture] = depthVal; + + if (format == DepthFmt::Depth24Stencil8) { + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; + } + + return; + } + + Helpers::warn("[RendererMTL::ClearBuffer] No buffer found!\n"); +} + +void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { + const u32 inputWidth = inputSize & 0xffff; + const u32 inputHeight = inputSize >> 16; + const auto inputFormat = ToColorFormat(Helpers::getBits<8, 3>(flags)); + const auto outputFormat = ToColorFormat(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); + + u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; + + auto srcFramebuffer = getColorRenderTarget(inputAddr, inputFormat, inputWidth, outputHeight); + nextRenderPassName = "Clear before display transfer"; + clearColor(nullptr, srcFramebuffer->texture); + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); + + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); + } + + // Apply scaling for the destination rectangle. + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; + } + + auto destFramebuffer = getColorRenderTarget(outputAddr, outputFormat, outputWidth, outputHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + // Texture copy size is aligned to 16 byte units + const u32 copySize = totalBytes & ~0xf; + if (copySize == 0) { + Helpers::warn("TextureCopy total bytes less than 16!\n"); + return; + } + + // The width and gap are provided in 16-byte units. + const u32 inputWidth = (inputSize & 0xffff) << 4; + const u32 inputGap = (inputSize >> 16) << 4; + const u32 outputWidth = (outputSize & 0xffff) << 4; + const u32 outputGap = (outputSize >> 16) << 4; + + if (inputGap != 0 || outputGap != 0) { + // Helpers::warn("Strided texture copy\n"); + } + + if (inputWidth != outputWidth) { + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!"); + return; + } + + // Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine. + // Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or + // the width multiplied by eight (because tiles are stored linearly in memory). + // To properly accelerate this we must examine each surface individually. For now we assume the most common case + // of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting + // that surface is not correct. + + // We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it + // by eight * sizePerPixel(RGBA8) to convert it to a useable width. + const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8); + const u32 copyStride = (inputWidth + inputGap) / (8 * bpp); + const u32 copyWidth = inputWidth / (8 * bpp); + + // inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region + // in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result + // is the number of vertical tiles so multiply that by eight to get the actual copy height. + u32 copyHeight; + if (inputWidth != 0) [[likely]] { + copyHeight = (copySize / inputWidth) * 8; + } else { + copyHeight = 0; + } + + // Find the source surface. + auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); + if (!srcFramebuffer) { + Helpers::warn("RendererGL::TextureCopy failed to locate src framebuffer!\n"); + return; + } + nextRenderPassName = "Clear before texture copy"; + clearColor(nullptr, srcFramebuffer->texture); + + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight); + + // Assume the destination surface has the same format. Unless the surfaces have the same block width, + // texture copy does not make sense. + auto destFramebuffer = getColorRenderTarget(outputAddr, srcFramebuffer->format, copyWidth, copyHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight); + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::drawVertices(PICA::PrimType primType, std::span vertices) { + // Color + auto colorRenderTarget = getColorRenderTarget(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); + + // Depth stencil + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthStencilWrite = regs[PICA::InternalRegs::DepthBufferWrite]; + const bool depthEnable = depthControl & 0x1; + const bool depthWriteEnable = Helpers::getBit<12>(depthControl); + const u8 depthFunc = Helpers::getBits<4, 3>(depthControl); + const u8 colorMask = Helpers::getBits<8, 4>(depthControl); + + Metal::DepthStencilHash depthStencilHash{false, 1}; + depthStencilHash.stencilConfig = regs[PICA::InternalRegs::StencilTest]; + depthStencilHash.stencilOpConfig = regs[PICA::InternalRegs::StencilOp]; + const bool stencilEnable = Helpers::getBit<0>(depthStencilHash.stencilConfig); + + std::optional depthStencilRenderTarget = std::nullopt; + if (depthEnable) { + depthStencilHash.depthStencilWrite = depthWriteEnable && depthStencilWrite; + depthStencilHash.depthFunc = depthFunc; + depthStencilRenderTarget = getDepthRenderTarget(); + } else { + if (depthWriteEnable) { + depthStencilHash.depthStencilWrite = true; + depthStencilRenderTarget = getDepthRenderTarget(); + } else if (stencilEnable) { + depthStencilRenderTarget = getDepthRenderTarget(); + } + } + + // Depth uniforms + struct { + float depthScale; + float depthOffset; + bool depthMapEnable; + } depthUniforms; + depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + + // -------- Pipeline -------- + Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1}; + if (depthStencilRenderTarget) { + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; + + // Blending and logic op + pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; + pipelineHash.colorWriteMask = colorMask; + + u8 logicOp = 3; // Copy, which doesn't do anything + if (pipelineHash.blendEnabled) { + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + } else { + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + } + + MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); + + // Depth stencil state + MTL::DepthStencilState* depthStencilState = depthStencilCache.get(depthStencilHash); + + // -------- Render -------- + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) + doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) + doesClear = true; + } + } + + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); + + // Update the LUT texture if necessary + if (gpu.lightingLUTDirty) { + updateLightingLUT(renderCommandEncoder); + } + if (gpu.fogLUTDirty) { + updateFogLUT(renderCommandEncoder); + } + + renderCommandEncoder->setRenderPipelineState(pipeline); + renderCommandEncoder->setDepthStencilState(depthStencilState); + // If size is < 4KB, use inline vertex data, otherwise use a buffer + if (vertices.size_bytes() < 4 * 1024) { + renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); + } else { + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); + } + + // Viewport + const u32 viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff; + const u32 viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff; + const u32 viewportWidth = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f; + const u32 viewportHeight = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f; + const auto rect = colorRenderTarget->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]); + MTL::Viewport viewport{double(rect.left + viewportX), double(rect.bottom + viewportY), double(viewportWidth), double(viewportHeight), 0.0, 1.0}; + renderCommandEncoder->setViewport(viewport); + + // Blend color + if (pipelineHash.blendEnabled) { + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); + + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + } + + // Stencil reference + if (stencilEnable) { + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } + + // Bind resources + setupTextureEnvState(renderCommandEncoder); + bindTexturesToSlots(renderCommandEncoder); + renderCommandEncoder->setVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); + renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + + renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); +} + +void RendererMTL::screenshot(const std::string& name) { + // TODO: implement + Helpers::warn("RendererMTL::screenshot not implemented"); +} + +void RendererMTL::deinitGraphicsContext() { + reset(); + + // Release + copyToLutTexturePipeline->release(); + displayPipeline->release(); + defaultDepthStencilState->release(); + lutTexture->release(); + linearSampler->release(); + nearestSampler->release(); + library->release(); + commandQueue->release(); + device->release(); +} + +std::optional RendererMTL::getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound +) { + // Try to find an already existing buffer that contains the provided address + // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to + // subrect of a surface and in case of texcopy we don't know the format of the surface. + auto buffer = colorRenderTargetCache.findFromAddress(addr); + if (buffer.has_value()) { + return buffer.value().get(); + } + + if (!createIfnotFound) { + return std::nullopt; + } + + // Otherwise create and cache a new buffer. + Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); + + return colorRenderTargetCache.add(sampleBuffer); +} + +Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { + Metal::DepthStencilRenderTarget sampleBuffer(device, depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); + auto buffer = depthStencilRenderTargetCache.find(sampleBuffer); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + return depthStencilRenderTargetCache.add(sampleBuffer); + } +} + +Metal::Texture& RendererMTL::getTexture(Metal::Texture& tex) { + auto buffer = textureCache.find(tex); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + Metal::Texture& newTex = textureCache.add(tex); + newTex.decodeTexture(textureData); + + return newTex; + } +} + +void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + struct { + u32 textureEnvSourceRegs[6]; + u32 textureEnvOperandRegs[6]; + u32 textureEnvCombinerRegs[6]; + u32 textureEnvScaleRegs[6]; + } envState; + u32 textureEnvColourRegs[6]; + + for (int i = 0; i < 6; i++) { + const u32 ioBase = ioBases[i]; + + envState.textureEnvSourceRegs[i] = regs[ioBase]; + envState.textureEnvOperandRegs[i] = regs[ioBase + 1]; + envState.textureEnvCombinerRegs[i] = regs[ioBase + 2]; + textureEnvColourRegs[i] = regs[ioBase + 3]; + envState.textureEnvScaleRegs[i] = regs[ioBase + 4]; + } + + encoder->setVertexBytes(&textureEnvColourRegs, sizeof(textureEnvColourRegs), 1); + encoder->setFragmentBytes(&envState, sizeof(envState), 1); +} + +void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::Tex0BorderColor, + PICA::InternalRegs::Tex1BorderColor, + PICA::InternalRegs::Tex2BorderColor, + }; + + for (int i = 0; i < 3; i++) { + if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { + continue; + } + + const size_t ioBase = ioBases[i]; + + const u32 dim = regs[ioBase + 1]; + const u32 config = regs[ioBase + 2]; + const u32 height = dim & 0x7ff; + const u32 width = Helpers::getBits<16, 11>(dim); + const u32 addr = (regs[ioBase + 4] & 0x0FFFFFFF) << 3; + u32 format = regs[ioBase + (i == 0 ? 13 : 5)] & 0xF; + + if (addr != 0) [[likely]] { + Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); + auto tex = getTexture(targetTex); + encoder->setFragmentTexture(tex.texture, i); + encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + } else { + // TODO: bind a dummy texture? + } + } + + // LUT texture + encoder->setFragmentTexture(lutTexture, 3); + encoder->setFragmentSamplerState(linearSampler, 3); +} + +void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { + gpu.lightingLUTDirty = false; + std::array lightingLut = {0.0f}; + + for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; + } + + //for (int i = 0; i < Lights::LUT_Count; i++) { + // lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); + //} + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lutTexture, 0); + Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + u32 arrayOffset = 0; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); +} + +void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { + gpu.fogLUTDirty = false; + std::array fogLut = {0.0f}; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lutTexture, 0); + //Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + //renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); + u32 arrayOffset = (u32)Lights::LUT_Count; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); +} + +void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { + nextRenderPassName = "Texture copy"; + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture + bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); + + // Pipeline + Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; + auto blitPipeline = blitPipelineCache.get(hash); + + renderCommandEncoder->setRenderPipelineState(blitPipeline); + + // Viewport + renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()}; + + // Bind resources + renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal new file mode 100644 index 000000000..40a7f50d9 --- /dev/null +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -0,0 +1,9 @@ +#include +using namespace metal; + +constant ushort lutTextureWidth [[function_constant(0)]]; + +// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal new file mode 100644 index 000000000..95f417c70 --- /dev/null +++ b/src/host_shaders/metal_shaders.metal @@ -0,0 +1,782 @@ +#include +using namespace metal; + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +constant float4 displayPositions[4] = { + float4(-1.0, -1.0, 0.0, 1.0), + float4( 1.0, -1.0, 0.0, 1.0), + float4(-1.0, 1.0, 0.0, 1.0), + float4( 1.0, 1.0, 0.0, 1.0) +}; + +constant float2 displayTexCoord[4] = { + float2(0.0, 1.0), + float2(0.0, 0.0), + float2(1.0, 1.0), + float2(1.0, 0.0) +}; + +vertex BasicVertexOut vertexDisplay(uint vid [[vertex_id]]) { + BasicVertexOut out; + out.position = displayPositions[vid]; + out.uv = displayTexCoord[vid]; + + return out; +} + +fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct PicaRegs { + uint regs[0x200 - 0x48]; + + uint read(uint reg) constant { + return regs[reg - 0x48]; + } +}; + +struct VertTEV { + uint textureEnvColor[6]; +}; + +float4 abgr8888ToFloat4(uint abgr) { + const float scale = 1.0 / 255.0; + + return scale * float4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); +} + +struct DrawVertexIn { + float4 position [[attribute(0)]]; + float4 quaternion [[attribute(1)]]; + float4 color [[attribute(2)]]; + float2 texCoord0 [[attribute(3)]]; + float2 texCoord1 [[attribute(4)]]; + float texCoord0W [[attribute(5)]]; + float3 view [[attribute(6)]]; + float2 texCoord2 [[attribute(7)]]; +}; + +// Metal cannot return arrays from vertex functions, this is an ugly workaround +struct EnvColor { + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; + float4 c5; + + thread float4& operator[](int i) { + switch (i) { + case 0: return c0; + case 1: return c1; + case 2: return c2; + case 3: return c3; + case 4: return c4; + case 5: return c5; + default: return c0; + } + } +}; + +float3 rotateFloat3ByQuaternion(float3 v, float4 q) { + float3 u = q.xyz; + float s = q.w; + + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + +// Convert an arbitrary-width floating point literal to an f32 +float decodeFP(uint hex, uint E, uint M) { + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0u) { + if (exponent == (1u << E) - 1u) + exponent = 255u; + else + exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return as_type(hex); +} + +struct DepthUniforms { + float depthScale; + float depthOffset; + bool depthMapEnable; +}; + +struct DrawVertexOut { + float4 position [[position]]; + float4 quaternion; + float4 color; + float3 texCoord0; + float2 texCoord1; + float2 texCoord2; + float3 view; + float3 normal; + float3 tangent; + float3 bitangent; + EnvColor textureEnvColor [[flat]]; + float4 textureEnvBufferColor [[flat]]; +}; + +struct DrawVertexOutWithClip { + DrawVertexOut out; + float clipDistance [[clip_distance]] [2]; +}; + +// TODO: check this +float transformZ(float z, float w, constant DepthUniforms& depthUniforms) { + z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset; + if (!depthUniforms.depthMapEnable) { + z *= w; + } + + return z * w; +} + +vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant VertTEV& tev [[buffer(1)]], constant DepthUniforms& depthUniforms [[buffer(2)]]) { + DrawVertexOut out; + + // Position + out.position = in.position; + // Flip the y position + out.position.y = -out.position.y; + + // Apply depth uniforms + out.position.z = transformZ(out.position.z, out.position.w, depthUniforms); + + // Color + out.color = min(abs(in.color), 1.0); + + // Texture coordinates + out.texCoord0 = float3(in.texCoord0, in.texCoord0W); + out.texCoord0.y = 1.0 - out.texCoord0.y; + out.texCoord1 = in.texCoord1; + out.texCoord1.y = 1.0 - out.texCoord1.y; + out.texCoord2 = in.texCoord2; + out.texCoord2.y = 1.0 - out.texCoord2.y; + + // View + out.view = in.view; + + // TBN + out.normal = normalize(rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion)); + out.tangent = normalize(rotateFloat3ByQuaternion(float3(1.0, 0.0, 0.0), in.quaternion)); + out.bitangent = normalize(rotateFloat3ByQuaternion(float3(0.0, 1.0, 0.0), in.quaternion)); + out.quaternion = in.quaternion; + + // Environment + for (int i = 0; i < 6; i++) { + out.textureEnvColor[i] = abgr8888ToFloat4(tev.textureEnvColor[i]); + } + + out.textureEnvBufferColor = abgr8888ToFloat4(picaRegs.read(0xFDu)); + + DrawVertexOutWithClip outWithClip; + outWithClip.out = out; + + // Parse clipping plane registers + float4 clipData = float4( + decodeFP(picaRegs.read(0x48u) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x49u) & 0xffffffu, 7u, 16u), + decodeFP(picaRegs.read(0x4Au) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x4Bu) & 0xffffffu, 7u, 16u) + ); + + // There's also another, always-on clipping plane based on vertex z + // TODO: transform + outWithClip.clipDistance[0] = -in.position.z; + outWithClip.clipDistance[1] = dot(clipData, in.position); + + return outWithClip; +} + +constant bool lightingEnabled [[function_constant(0)]]; +constant uint8_t lightingNumLights [[function_constant(1)]]; +constant uint32_t lightingConfig1 [[function_constant(2)]]; +constant uint16_t alphaControl [[function_constant(3)]]; + +struct Globals { + bool error_unimpl; + + float4 tevSources[16]; + float4 tevNextPreviousBuffer; + bool tevUnimplementedSourceFlag = false; + + uint GPUREG_LIGHTING_LUTINPUT_SCALE; + uint GPUREG_LIGHTING_LUTINPUT_ABS; + uint GPUREG_LIGHTING_LUTINPUT_SELECT; + uint GPUREG_LIGHTi_CONFIG; + + // HACK + //bool lightingEnabled; + //uint8_t lightingNumLights; + //uint32_t lightingConfig1; + //uint16_t alphaControl; + + float3 normal; +}; + +// See docs/lighting.md +constant uint samplerEnabledBitfields[2] = {0x7170e645u, 0x7f013fefu}; + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} + +struct FragTEV { + uint textureEnvSource[6]; + uint textureEnvOperand[6]; + uint textureEnvCombiner[6]; + uint textureEnvScale[6]; + + float4 fetchSource(thread Globals& globals, uint src_id) constant { + if (src_id >= 6u && src_id < 13u) { + globals.tevUnimplementedSourceFlag = true; + } + + return globals.tevSources[src_id]; + } + + float4 getColorAndAlphaSource(thread Globals& globals, int tev_id, int src_id) constant { + float4 result; + + float4 colorSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4)) & 15u); + float4 alphaSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u); + + uint colorOperand = (textureEnvOperand[tev_id] >> (src_id * 4)) & 15u; + uint alphaOperand = (textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u; + + // TODO: figure out what the undocumented values do + switch (colorOperand) { + case 0u: result.rgb = colorSource.rgb; break; // Source color + case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color + case 2u: result.rgb = float3(colorSource.a); break; // Source alpha + case 3u: result.rgb = float3(1.0 - colorSource.a); break; // One minus source alpha + case 4u: result.rgb = float3(colorSource.r); break; // Source red + case 5u: result.rgb = float3(1.0 - colorSource.r); break; // One minus source red + case 8u: result.rgb = float3(colorSource.g); break; // Source green + case 9u: result.rgb = float3(1.0 - colorSource.g); break; // One minus source green + case 12u: result.rgb = float3(colorSource.b); break; // Source blue + case 13u: result.rgb = float3(1.0 - colorSource.b); break; // One minus source blue + default: break; + } + + // TODO: figure out what the undocumented values do + switch (alphaOperand) { + case 0u: result.a = alphaSource.a; break; // Source alpha + case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha + case 2u: result.a = alphaSource.r; break; // Source red + case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red + case 4u: result.a = alphaSource.g; break; // Source green + case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green + case 6u: result.a = alphaSource.b; break; // Source blue + case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue + default: break; + } + + return result; + } + + float4 calculateCombiner(thread Globals& globals, int tev_id) constant { + float4 source0 = getColorAndAlphaSource(globals, tev_id, 0); + float4 source1 = getColorAndAlphaSource(globals, tev_id, 1); + float4 source2 = getColorAndAlphaSource(globals, tev_id, 2); + + uint colorCombine = textureEnvCombiner[tev_id] & 15u; + uint alphaCombine = (textureEnvCombiner[tev_id] >> 16) & 15u; + + float4 result = float4(1.0); + + // TODO: figure out what the undocumented values do + switch (colorCombine) { + case 0u: result.rgb = source0.rgb; break; // Replace + case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate + case 2u: result.rgb = min(float3(1.0), source0.rgb + source1.rgb); break; // Add + case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate + case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract + case 6u: result.rgb = float3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB + case 7u: result = float4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA + case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add + case 9u: result.rgb = min((source0.rgb + source1.rgb), 1.0) * source2.rgb; break; // Add then multiply + default: break; + } + + if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode. + // TODO: figure out what the undocumented values do + // TODO: test if the alpha combiner supports all the same modes as the color combiner. + switch (alphaCombine) { + case 0u: result.a = source0.a; break; // Replace + case 1u: result.a = source0.a * source1.a; break; // Modulate + case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add + case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate + case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract + case 8u: result.a = min(source0.a * source1.a + source2.a, 1.0); break; // Multiply then add + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply + default: break; + } + } + + result.rgb *= float(1 << (textureEnvScale[tev_id] & 3u)); + result.a *= float(1 << ((textureEnvScale[tev_id] >> 16) & 3u)); + + return result; + } +}; + +enum class LogicOp : uint8_t { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15 +}; + +uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { + switch (logicOp) { + case LogicOp::Clear: return as_type(float4(0.0)); + case LogicOp::And: return s & d; + case LogicOp::AndReverse: return s & ~d; + case LogicOp::Copy: return s; + case LogicOp::Set: return as_type(float4(1.0)); + case LogicOp::CopyInverted: return ~s; + case LogicOp::NoOp: return d; + case LogicOp::Invert: return ~d; + case LogicOp::Nand: return ~(s & d); + case LogicOp::Or: return s | d; + case LogicOp::Nor: return ~(s | d); + case LogicOp::Xor: return s ^ d; + case LogicOp::Equiv: return ~(s ^ d); + case LogicOp::AndInverted: return ~s & d; + case LogicOp::OrReverse: return s | ~d; + case LogicOp::OrInverted: return ~s | d; + } +} + +#define D0_LUT 0u +#define D1_LUT 1u +#define SP_LUT 2u +#define FR_LUT 3u +#define RB_LUT 4u +#define RG_LUT 5u +#define RR_LUT 6u + +#define FOG_INDEX 24 + +float lutLookup(texture2d texLut, uint lut, uint index) { + return texLut.read(uint2(index, lut)).r; +} + +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + globals.error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (extract_bits(lightingConfig1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(globals.normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(in.view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(globals.normal, normalize(in.view)); + break; + } + case 3u: { + delta = dot(light_vector, globals.normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(picaRegs.read(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(picaRegs.read(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = extract_bits(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + float3 spotlight_vector = float3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + globals.error_unimpl = true; + break; + } + default: { + delta = 1.0; + globals.error_unimpl = true; + break; + } + } + + // 0 = enabled + if (extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(texLut, lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(texLut, lut_index, index) * scale; + } +} + +float3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8))); +} + +// Implements the following algorthm: https://mathb.in/26766 +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + //float3 normal = normalize(in.normal); + //float3 tangent = normalize(in.tangent); + //float3 bitangent = normalize(in.bitangent); + //float3 view = normalize(in.view); + + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u); + + primaryColor = float4(0.0, 0.0, 0.0, 1.0); + secondaryColor = float4(0.0, 0.0, 0.0, 1.0); + + uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u); + globals.GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u); + globals.GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u); + globals.GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u); + + uint bumpMode = extract_bits(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bumpMode) { + default: { + globals.normal = rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion); + break; + } + } + + float4 diffuseSum = float4(0.0, 0.0, 0.0, 1.0); + float4 specularSum = float4(0.0, 0.0, 0.0, 1.0); + + uint environmentId = extract_bits(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clampHighlights = extract_bits(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint lightId; + float3 lightVector = float3(0.0); + float3 halfVector = float3(0.0); + + for (uint i = 0u; i < lightingNumLights + 1; i++) { + lightId = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); + + uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + (lightId << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = picaRegs.read(0x0141u + (lightId << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = picaRegs.read(0x0142u + (lightId << 4u)); + uint GPUREG_LIGHTi_AMBIENT = picaRegs.read(0x0143u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = picaRegs.read(0x0144u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = picaRegs.read(0x0145u + (lightId << 4u)); + globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); + + float lightDistance; + float3 lightPosition = normalize(float3( + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + )); + + // Positional Light + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + // error_unimpl = true; + lightVector = lightPosition + in.view; + } + + // Directional light + else { + lightVector = lightPosition; + } + + lightDistance = length(lightVector); + lightVector = normalize(lightVector); + halfVector = lightVector + normalize(in.view); + + float NdotL = dot(globals.normal, lightVector); // N dot Li + + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) + NdotL = max(0.0, NdotL); + else + NdotL = abs(NdotL); + + float geometricFactor; + bool useGeo0 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool useGeo1 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (useGeo0 || useGeo1) { + geometricFactor = dot(halfVector, halfVector); + geometricFactor = geometricFactor == 0.0 ? 0.0 : min(NdotL / geometricFactor, 1.0); + } + + float distanceAttenuation = 1.0; + if (extract_bits(lightingConfig1, 24 + int(lightId), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = extract_bits(picaRegs.read(0x014Au + (lightId << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = extract_bits(picaRegs.read(0x014Bu + (lightId << 4u)), 0, 20); + + float distanceAttenuationBias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distanceAttenuationScale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distanceAttenuation = lutLookup(texLut, 16u + lightId, index); + } + + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); + float3 reflectedColor; + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); + + if (isSamplerEnabled(environmentId, RG_LUT)) { + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.g = reflectedColor.r; + } + + if (isSamplerEnabled(environmentId, RB_LUT)) { + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.b = reflectedColor.r; + } + + float3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0Distribution; + float3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1Distribution * reflectedColor; + + specular0 *= useGeo0 ? geometricFactor : 1.0; + specular1 *= useGeo1 ? geometricFactor : 1.0; + + float clampFactor = 1.0; + if (clampHighlights && NdotL == 0.0) { + clampFactor = 0.0; + } + + float lightFactor = distanceAttenuation * spotlightAttenuation; + diffuseSum.rgb += lightFactor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specularSum.rgb += lightFactor * clampFactor * (specular0 + specular1); + } + uint fresnelOutput1 = extract_bits(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnelOutput2 = extract_bits(GPUREG_LIGHTING_CONFIG0, 3, 1); + + float fresnelFactor; + + if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); + } + + if (fresnelOutput1 == 1u) { + diffuseSum.a = fresnelFactor; + } + + if (fresnelOutput2 == 1u) { + specularSum.a = fresnelFactor; + } + + uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u); + float4 globalAmbient = float4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primaryColor = clamp(globalAmbient + diffuseSum, 0.0, 1.0); + secondaryColor = clamp(specularSum, 0.0, 1.0); +} + +float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { + return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); +} + +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], + texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], + sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { + Globals globals; + + // HACK + //globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u; + //globals.lightingNumLights = picaRegs.read(0x01C2u); + //globals.lightingConfig1 = picaRegs.read(0x01C4u); + //globals.alphaControl = picaRegs.read(0x104); + + globals.tevSources[0] = in.color; + if (lightingEnabled) { + calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + } else { + globals.tevSources[1] = float4(0.0); + globals.tevSources[2] = float4(0.0); + } + + uint textureConfig = picaRegs.read(0x80u); + float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2; + + if ((textureConfig & 1u) != 0u) globals.tevSources[3] = tex0.sample(samplr0, in.texCoord0.xy); + if ((textureConfig & 2u) != 0u) globals.tevSources[4] = tex1.sample(samplr1, in.texCoord1); + if ((textureConfig & 4u) != 0u) globals.tevSources[5] = tex2.sample(samplr2, texCoord2); + globals.tevSources[13] = float4(0.0); // Previous buffer + globals.tevSources[15] = in.color; // Previous combiner + + globals.tevNextPreviousBuffer = in.textureEnvBufferColor; + uint textureEnvUpdateBuffer = picaRegs.read(0xE0u); + + for (int i = 0; i < 6; i++) { + globals.tevSources[14] = in.textureEnvColor[i]; // Constant color + globals.tevSources[15] = tev.calculateCombiner(globals, i); + globals.tevSources[13] = globals.tevNextPreviousBuffer; + + if (i < 4) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + globals.tevNextPreviousBuffer.rgb = globals.tevSources[15].rgb; + } + + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + globals.tevNextPreviousBuffer.a = globals.tevSources[15].a; + } + } + } + + float4 color = globals.tevSources[15]; + + // Fog + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + float3 fog_color = float3(r, g, b); + + color.rgb = mix(fog_color, color.rgb, fog_factor); + } + + // Perform alpha test + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; + float alpha = color.a; + + switch (func) { + case 0u: discard_fragment(); // Never pass alpha test + case 1u: break; // Always pass alpha test + case 2u: // Pass if equal + if (alpha != reference) discard_fragment(); + break; + case 3u: // Pass if not equal + if (alpha == reference) discard_fragment(); + break; + case 4u: // Pass if less than + if (alpha >= reference) discard_fragment(); + break; + case 5u: // Pass if less than or equal + if (alpha > reference) discard_fragment(); + break; + case 6u: // Pass if greater than + if (alpha <= reference) discard_fragment(); + break; + case 7u: // Pass if greater than or equal + if (alpha < reference) discard_fragment(); + break; + } + } + + return performLogicOp(logicOp, color, prevColor); +} From 58e1a536996348d1ec4c8ab5a65b396fe28ccb3b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 11:06:23 +0200 Subject: [PATCH 03/14] metal: create renderer --- include/panda_qt/main_window.hpp | 1 + include/renderer.hpp | 3 ++- include/renderer_gl/surface_cache.hpp | 4 ++-- src/core/PICA/gpu.cpp | 11 ++++++++++- src/panda_qt/main_window.cpp | 5 ++++- src/panda_sdl/frontend_sdl.cpp | 14 ++++++++++++-- src/renderer.cpp | 4 +++- 7 files changed, 34 insertions(+), 8 deletions(-) diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 3ff16a1d0..fc756b9fd 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -129,6 +129,7 @@ class MainWindow : public QMainWindow { // Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer bool usingGL = false; bool usingVk = false; + bool usingMtl = false; // Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard // This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state diff --git a/include/renderer.hpp b/include/renderer.hpp index 569a730b7..4eacf0b14 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -17,7 +17,8 @@ enum class RendererType : s8 { Null = 0, OpenGL = 1, Vulkan = 2, - Software = 3, + Metal = 3, + Software = 4, }; struct EmulatorConfig; diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 5323741fc..7346fd11a 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,8 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - static_assert(std::is_same() || std::is_same() || - std::is_same(), "Invalid surface type"); + //static_assert(std::is_same() || std::is_same() || + // std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index fe336edc8..95001b332 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -15,6 +15,9 @@ #ifdef PANDA3DS_ENABLE_VULKAN #include "renderer_vk/renderer_vk.hpp" #endif +#ifdef PANDA3DS_ENABLE_METAL +#include "renderer_mtl/renderer_mtl.hpp" +#endif constexpr u32 topScreenWidth = 240; constexpr u32 topScreenHeight = 400; @@ -52,6 +55,12 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { renderer.reset(new RendererVK(*this, regs, externalRegs)); break; } +#endif +#ifdef PANDA3DS_ENABLE_METAL + case RendererType::Metal: { + renderer.reset(new RendererMTL(*this, regs, externalRegs)); + break; + } #endif default: { Helpers::panic("Rendering backend not supported: %s", Renderer::typeToString(config.rendererType)); @@ -365,7 +374,7 @@ PICA::Vertex GPU::getImmediateModeVertex() { // Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute shaderUnit.vs.run(); - + // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index f1949da77..4c187bc2c 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -103,6 +103,7 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) const RendererType rendererType = emu->getConfig().rendererType; usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null); usingVk = (rendererType == RendererType::Vulkan); + usingMtl = (rendererType == RendererType::Metal); if (usingGL) { // Make GL context current for this thread, enable VSync @@ -113,6 +114,8 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) emu->initGraphicsContext(glContext); } else if (usingVk) { Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); + } else if (usingMtl) { + Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!"); } else { Helpers::panic("Unsupported graphics backend for Qt frontend!"); } @@ -628,4 +631,4 @@ void MainWindow::setupControllerSensors(SDL_GameController* controller) { if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } -} \ No newline at end of file +} diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 8f9f4240d..057a4858f 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -67,6 +67,16 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp } #endif +#ifdef PANDA3DS_ENABLE_METAL + if (config.rendererType == RendererType::Metal) { + window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); + + if (window == nullptr) { + Helpers::warn("Window creation failed: %s", SDL_GetError()); + } + } +#endif + emu.initGraphicsContext(window); } @@ -286,7 +296,7 @@ void FrontendSDL::run() { } break; } - + case SDL_CONTROLLERSENSORUPDATE: { if (event.csensor.sensor == SDL_SENSOR_GYRO) { auto rotation = Gyro::SDL::convertRotation({ @@ -370,4 +380,4 @@ void FrontendSDL::setupControllerSensors(SDL_GameController* controller) { if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } -} \ No newline at end of file +} diff --git a/src/renderer.cpp b/src/renderer.cpp index 76c3e7a02..6a18df85e 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -17,6 +17,7 @@ std::optional Renderer::typeFromString(std::string inString) { {"null", RendererType::Null}, {"nil", RendererType::Null}, {"none", RendererType::Null}, {"gl", RendererType::OpenGL}, {"ogl", RendererType::OpenGL}, {"opengl", RendererType::OpenGL}, {"vk", RendererType::Vulkan}, {"vulkan", RendererType::Vulkan}, {"vulcan", RendererType::Vulkan}, + {"mtl", RendererType::Metal}, {"metal", RendererType::Metal}, {"sw", RendererType::Software}, {"soft", RendererType::Software}, {"software", RendererType::Software}, {"softrast", RendererType::Software}, }; @@ -33,7 +34,8 @@ const char* Renderer::typeToString(RendererType rendererType) { case RendererType::Null: return "null"; case RendererType::OpenGL: return "opengl"; case RendererType::Vulkan: return "vulkan"; + case RendererType::Metal: return "metal"; case RendererType::Software: return "software"; default: return "Invalid"; } -} \ No newline at end of file +} From 45eda2f12048204315ce771ba84b95754bf6fdc6 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 12:25:46 +0200 Subject: [PATCH 04/14] bring back cmake changes --- CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 31fdd9f27..4fd12174b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,11 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) + # Disable stack buffer overflow checks in user builds + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") +endif() + add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -256,6 +261,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp + include/sdl_gyro.hpp ) cmrc_add_resource_library( @@ -570,7 +576,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "") + set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") endif() target_link_libraries(Alber PRIVATE AlberCore) From 8830747e90a119d50fb5c7063af1f9e08bfe4cdd Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 29 Aug 2024 19:43:36 +0200 Subject: [PATCH 05/14] clear render targets after creation --- .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 2 +- src/core/renderer_mtl/renderer_mtl.cpp | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index 1760cdfa5..8aa299da1 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -12,7 +12,7 @@ struct BufferHandle { }; // 64MB buffer for caching vertex data -#define CACHE_BUFFER_SIZE 64 * 1024 * 1024 +#define CACHE_BUFFER_SIZE 128 * 1024 * 1024 class VertexBufferCache { public: diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 10bca5ddf..bdb5390d6 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -413,7 +413,7 @@ void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 // Find the source surface. auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); if (!srcFramebuffer) { - Helpers::warn("RendererGL::TextureCopy failed to locate src framebuffer!\n"); + Helpers::warn("RendererMTL::TextureCopy failed to locate src framebuffer!\n"); return; } nextRenderPassName = "Clear before texture copy"; @@ -605,7 +605,12 @@ std::optional RendererMTL::getColorRenderTarget( // Otherwise create and cache a new buffer. Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); - return colorRenderTargetCache.add(sampleBuffer); + auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); + + // Clear the color buffer + colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; + + return colorBuffer; } Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { @@ -615,7 +620,15 @@ Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { if (buffer.has_value()) { return buffer.value().get(); } else { - return depthStencilRenderTargetCache.add(sampleBuffer); + auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); + + // Clear the depth buffer + depthClearOps[depthBuffer.texture] = 0.0f; + if (depthBuffer.format == DepthFmt::Depth24Stencil8) { + stencilClearOps[depthBuffer.texture] = 0; + } + + return depthBuffer; } } From 90420160f2092b2c2cc4bf14b261480bc8849875 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 24 Sep 2024 14:51:27 +0200 Subject: [PATCH 06/14] correct comment --- include/renderer_mtl/mtl_vertex_buffer_cache.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index 8aa299da1..cc552477a 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -11,7 +11,7 @@ struct BufferHandle { size_t offset; }; -// 64MB buffer for caching vertex data +// 128MB buffer for caching vertex data #define CACHE_BUFFER_SIZE 128 * 1024 * 1024 class VertexBufferCache { From 158be432fcf36ca4165bca1868850b3339a7dace Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 31 Oct 2024 09:25:01 +0100 Subject: [PATCH 07/14] rework the lut system --- CMakeLists.txt | 4 +- .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 5 +- include/renderer_mtl/mtl_lut_texture.hpp | 25 ++ include/renderer_mtl/mtl_texture.hpp | 1 + include/renderer_mtl/renderer_mtl.hpp | 29 +- src/core/renderer_mtl/mtl_lut_texture.cpp | 32 +++ src/core/renderer_mtl/renderer_mtl.cpp | 271 +++++++++++------- .../metal_copy_to_lut_texture.metal | 2 +- src/host_shaders/metal_shaders.metal | 58 ++-- 9 files changed, 274 insertions(+), 153 deletions(-) create mode 100644 include/renderer_mtl/mtl_lut_texture.hpp create mode 100644 src/core/renderer_mtl/mtl_lut_texture.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fac11cbc8..854b9b9e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -482,6 +482,7 @@ if(ENABLE_METAL AND APPLE) include/renderer_mtl/mtl_render_target.hpp include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/mtl_lut_texture.hpp include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/objc_helper.hpp ) @@ -490,6 +491,7 @@ if(ENABLE_METAL AND APPLE) src/core/renderer_mtl/renderer_mtl.cpp src/core/renderer_mtl/mtl_texture.cpp src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/mtl_lut_texture.cpp src/core/renderer_mtl/objc_helper.mm src/host_shaders/metal_shaders.metal src/host_shaders/metal_copy_to_lut_texture.metal @@ -587,7 +589,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) if(NOT ENABLE_OPENGL) message(FATAL_ERROR "Qt frontend requires OpenGL") endif() - + option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF) set(QT_LANGUAGES docs/translations) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index 8bfea6366..c5105a13c 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -135,7 +135,10 @@ class DrawPipelineCache { colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); } - desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) + desc->setStencilAttachmentPixelFormat(depthFormat); NS::Error* error = nullptr; desc->setLabel(toNSString("Draw pipeline")); diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp new file mode 100644 index 000000000..162bfe25b --- /dev/null +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace Metal { + +class LutTexture { +public: + LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); + ~LutTexture(); + + u32 getNextIndex(); + + // Getters + MTL::Texture* getTexture() { return texture; } + + u32 getCurrentIndex() { return currentIndex; } + +private: + MTL::Texture* texture; + + u32 currentIndex = 0; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 590132bd1..9cec268dc 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -1,4 +1,5 @@ #pragma once + #include #include #include diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 9ba0937a3..e28b63b46 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -1,3 +1,5 @@ +#pragma once + #include #include @@ -8,6 +10,8 @@ #include "mtl_draw_pipeline_cache.hpp" #include "mtl_depth_stencil_cache.hpp" #include "mtl_vertex_buffer_cache.hpp" +#include "mtl_lut_texture.hpp" + // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -54,12 +58,15 @@ class RendererMTL final : public Renderer { Metal::DepthStencilCache depthStencilCache; Metal::VertexBufferCache vertexBufferCache; - // Objects + // Resources MTL::SamplerState* nearestSampler; MTL::SamplerState* linearSampler; - MTL::Texture* lutTexture; + MTL::Texture* nullTexture; MTL::DepthStencilState* defaultDepthStencilState; + Metal::LutTexture* lutLightingTexture; + Metal::LutTexture* lutFogTexture; + // Pipelines MTL::RenderPipelineState* displayPipeline; MTL::RenderPipelineState* copyToLutTexturePipeline; @@ -91,21 +98,7 @@ class RendererMTL final : public Renderer { } } - void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr) { - createCommandBufferIfNeeded(); - - if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { - endRenderPass(); - - renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); - renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); - - lastColorTexture = colorTexture; - lastDepthTexture = depthTexture; - } - - renderPassDescriptor->release(); - } + void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr); void commitCommandBuffer() { if (renderCommandEncoder) { @@ -115,6 +108,8 @@ class RendererMTL final : public Renderer { } if (commandBuffer) { commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); commandBuffer->release(); commandBuffer = nullptr; } diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp new file mode 100644 index 000000000..ac4ff6d93 --- /dev/null +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -0,0 +1,32 @@ +#include "renderer_mtl/renderer_mtl.hpp" + +namespace Metal { + +constexpr u32 LAYER_COUNT = 1024; + +LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); + + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); +} + +LutTexture::~LutTexture() { + texture->release(); +} + +u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + + return currentIndex; +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index bdb5390d6..bf2cdab11 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -1,17 +1,21 @@ -#include "PICA/gpu.hpp" #include "renderer_mtl/renderer_mtl.hpp" -#include "renderer_mtl/objc_helper.hpp" #include #include +#include "renderer_mtl/mtl_lut_texture.hpp" + +// HACK +#undef NO +#include "PICA/gpu.hpp" #include "SDL_metal.h" using namespace PICA; CMRC_DECLARE(RendererMTL); -const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; +const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +const u32 FOG_LUT_TEXTURE_WIDTH = 128; // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { @@ -23,10 +27,10 @@ PICA::ColorFmt ToColorFormat(u32 format) { } MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { - //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + // MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); NS::Error* error = nullptr; MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); - //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + // MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); if (error) { Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); } @@ -39,19 +43,19 @@ RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, RendererMTL::~RendererMTL() {} void RendererMTL::reset() { - vertexBufferCache.reset(); - depthStencilCache.reset(); - drawPipelineCache.reset(); - blitPipelineCache.reset(); - textureCache.reset(); - depthStencilRenderTargetCache.reset(); + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); colorRenderTargetCache.reset(); } void RendererMTL::display() { CA::MetalDrawable* drawable = metalLayer->nextDrawable(); if (!drawable) { - return; + return; } using namespace PICA::ExternalRegs; @@ -62,7 +66,7 @@ void RendererMTL::display() { auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); if (topScreen) { - clearColor(nullptr, topScreen->get().texture); + clearColor(nullptr, topScreen->get().texture); } // Bottom screen @@ -71,7 +75,7 @@ void RendererMTL::display() { auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); if (bottomScreen) { - clearColor(nullptr, bottomScreen->get().texture); + clearColor(nullptr, bottomScreen->get().texture); } // -------- Draw -------- @@ -131,14 +135,14 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType2D); - textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); - textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); - textureDescriptor->setHeight(Lights::LUT_Count + 1); - textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + textureDescriptor->setWidth(1); + textureDescriptor->setHeight(1); textureDescriptor->setStorageMode(MTL::StorageModePrivate); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead); - lutTexture = device->newTexture(textureDescriptor); - lutTexture->setLabel(toNSString("LUT texture")); + nullTexture = device->newTexture(textureDescriptor); + nullTexture->setLabel(toNSString("Null texture")); textureDescriptor->release(); // Samplers @@ -153,6 +157,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { samplerDescriptor->release(); + lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"); + lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); + // -------- Pipelines -------- // Load shaders @@ -249,14 +256,15 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Copy to LUT texture MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); - constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); - error = nullptr; - MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); - if (error) { - Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - constants->release(); + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = + copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); @@ -314,8 +322,8 @@ void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 c depthClearOps[depth->get().texture] = depthVal; if (format == DepthFmt::Depth24Stencil8) { - const u8 stencilVal = value >> 24; - stencilClearOps[depth->get().texture] = stencilVal; + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; } return; @@ -365,7 +373,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, } void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { - // Texture copy size is aligned to 16 byte units + // Texture copy size is aligned to 16 byte units const u32 copySize = totalBytes & ~0xf; if (copySize == 0) { Helpers::warn("TextureCopy total bytes less than 16!\n"); @@ -463,33 +471,33 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spanformat, DepthFmt::Unknown1}; if (depthStencilRenderTarget) { - pipelineHash.depthFmt = depthStencilRenderTarget->format; - } - pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; - pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; - pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; - pipelineHash.fragHash.alphaControl = regs[0x104]; + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; // Blending and logic op pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; pipelineHash.colorWriteMask = colorMask; - u8 logicOp = 3; // Copy, which doesn't do anything + u8 logicOp = 3; // Copy if (pipelineHash.blendEnabled) { - pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; } else { - logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); } MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); @@ -500,25 +508,25 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spaninit(); bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); - if (depthStencilRenderTarget) { - if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) - doesClear = true; - if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { - if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) - doesClear = true; - } - } - - nextRenderPassName = "Draw vertices"; - beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + } + } + + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded( + renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr) + ); // Update the LUT texture if necessary if (gpu.lightingLUTDirty) { updateLightingLUT(renderCommandEncoder); } if (gpu.fogLUTDirty) { - updateFogLUT(renderCommandEncoder); - } + updateFogLUT(renderCommandEncoder); + } renderCommandEncoder->setRenderPipelineState(pipeline); renderCommandEncoder->setDepthStencilState(depthStencilState); @@ -526,7 +534,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); } else { - Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); } @@ -541,20 +549,20 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span(constantColor); - const u8 b = Helpers::getBits<16, 8>(constantColor); - const u8 a = Helpers::getBits<24, 8>(constantColor); + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); - renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); } // Stencil reference if (stencilEnable) { - const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value - renderCommandEncoder->setStencilReferenceValue(reference); - } + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } // Bind resources setupTextureEnvState(renderCommandEncoder); @@ -563,6 +571,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + u32 lutSlices[2] = {lutLightingTexture->getCurrentIndex(), lutFogTexture->getCurrentIndex()}; + renderCommandEncoder->setFragmentBytes(&lutSlices, sizeof(lutSlices), 3); renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); } @@ -575,11 +585,14 @@ void RendererMTL::screenshot(const std::string& name) { void RendererMTL::deinitGraphicsContext() { reset(); + delete lutLightingTexture; + delete lutFogTexture; + // Release copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); - lutTexture->release(); + nullTexture->release(); linearSampler->release(); nearestSampler->release(); library->release(); @@ -607,10 +620,10 @@ std::optional RendererMTL::getColorRenderTarget( auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); - // Clear the color buffer - colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; + // Clear the color buffer + colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; - return colorBuffer; + return colorBuffer; } Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { @@ -622,13 +635,13 @@ Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { } else { auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); - // Clear the depth buffer - depthClearOps[depthBuffer.texture] = 0.0f; - if (depthBuffer.format == DepthFmt::Depth24Stencil8) { - stencilClearOps[depthBuffer.texture] = 0; - } + // Clear the depth buffer + depthClearOps[depthBuffer.texture] = 0.0f; + if (depthBuffer.format == DepthFmt::Depth24Stencil8) { + stencilClearOps[depthBuffer.texture] = 0; + } - return depthBuffer; + return depthBuffer; } } @@ -683,7 +696,9 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - continue; + encoder->setFragmentTexture(nullTexture, i); + encoder->setFragmentSamplerState(nearestSampler, i); + continue; } const size_t ioBase = ioBases[i]; @@ -701,42 +716,55 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { encoder->setFragmentTexture(tex.texture, i); encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { - // TODO: bind a dummy texture? + // TODO: log } } - - // LUT texture - encoder->setFragmentTexture(lutTexture, 3); - encoder->setFragmentSamplerState(linearSampler, 3); } void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { gpu.lightingLUTDirty = false; - std::array lightingLut = {0.0f}; - for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { - uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; - lightingLut[i] = (float)(value << 4) / 65535.0f; + std::array lightingLut; + + for (int i = 0; i < gpu.lightingLUT.size(); i++) { + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (value << 4); } - //for (int i = 0; i < Lights::LUT_Count; i++) { - // lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); - //} + u32 index = lutLightingTexture->getNextIndex(); + lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0); + + /* + endRenderPass(); + Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); + + auto blitCommandEncoder = commandBuffer->blitCommandEncoder(); + blitCommandEncoder->copyFromBuffer(buffer.buffer, buffer.offset, LIGHT_LUT_TEXTURE_WIDTH * 2 * 4, 0, MTL::Size(LIGHT_LUT_TEXTURE_WIDTH, + Lights::LUT_Count, 1), lutLightingTexture, 0, 0, MTL::Origin(0, 0, 0)); + + blitCommandEncoder->endEncoding(); + */ + + /* renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); - renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutTexture, 0); + renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); u32 arrayOffset = 0; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), GPU::LightingLutSize); + + MTL::Resource* barrierResources[] = {lutLightingTexture}; + renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); + */ } void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { gpu.fogLUTDirty = false; - std::array fogLut = {0.0f}; + + std::array fogLut = {0.0f}; for (int i = 0; i < fogLut.size(); i += 2) { const uint32_t value = gpu.fogLUT[i >> 1]; @@ -749,20 +777,31 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { fogLut[i + 1] = fogDifference; } + u32 index = lutFogTexture->getNextIndex(); + lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0); + + /* renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutTexture, 0); - //Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); - //renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); + // Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + // renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); u32 arrayOffset = (u32)Lights::LUT_Count; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(128)); + + MTL::Resource* barrierResources[] = {lutLightingTexture}; + renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); + */ } -void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { - nextRenderPassName = "Texture copy"; +void RendererMTL::textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect +) { + nextRenderPassName = "Texture copy"; MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); @@ -775,8 +814,13 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta renderCommandEncoder->setRenderPipelineState(blitPipeline); // Viewport - renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); - float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()}; + renderCommandEncoder->setViewport(MTL::Viewport{ + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0 + }); + float srcRectNDC[4] = { + srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v() + }; // Bind resources renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); @@ -785,3 +829,26 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } + +void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + + // Bind persistent resources + + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal index 40a7f50d9..c21246f16 100644 --- a/src/host_shaders/metal_copy_to_lut_texture.metal +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -4,6 +4,6 @@ using namespace metal; constant ushort lutTextureWidth [[function_constant(0)]]; // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass -vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); } diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index 95f417c70..c2c1799fa 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -406,13 +406,11 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { #define RG_LUT 5u #define RR_LUT 6u -#define FOG_INDEX 24 - -float lutLookup(texture2d texLut, uint lut, uint index) { - return texLut.read(uint2(index, lut)).r; +float lutLookup(texture2d_array texLut, uint slice, uint lut, uint index) { + return texLut.read(uint2(index, lut), slice).r; } -float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { uint lut_index; int bit_in_config1; if (lut_id == SP_LUT) { @@ -498,12 +496,12 @@ float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant delta = abs(delta); } int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); - return lutLookup(texLut, lut_index, index) * scale; + return lutLookup(texLut, slice, lut_index, index) * scale; } else { // Range is [-1, 1] so we need to map it to [0, 1] int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); if (index < 0) index += 256; - return lutLookup(texLut, lut_index, index) * scale; + return lutLookup(texLut, slice, lut_index, index) * scale; } } @@ -515,7 +513,7 @@ float3 regToColor(uint reg) { } // Implements the following algorthm: https://mathb.in/26766 -void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { // Quaternions describe a transformation from surface-local space to eye space. // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). @@ -566,10 +564,10 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); float lightDistance; - float3 lightPosition = normalize(float3( + float3 lightPosition = float3( decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); + ); // Positional Light if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { @@ -613,23 +611,23 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; delta = clamp(delta, 0.0, 1.0); int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); - distanceAttenuation = lutLookup(texLut, 16u + lightId, index); + distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index); } - float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); - float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); - float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector); float3 reflectedColor; - reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector); if (isSamplerEnabled(environmentId, RG_LUT)) { - reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector); } else { reflectedColor.g = reflectedColor.r; } if (isSamplerEnabled(environmentId, RB_LUT)) { - reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector); } else { reflectedColor.b = reflectedColor.r; } @@ -655,7 +653,7 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float fresnelFactor; if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { - fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector); } if (fresnelOutput1 == 1u) { @@ -676,9 +674,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); } -fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], - texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], - sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d_array texLightingLut [[texture(3)]], texture1d_array texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; // HACK @@ -689,7 +685,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c globals.tevSources[0] = in.color; if (lightingEnabled) { - calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]); } else { globals.tevSources[1] = float4(0.0); globals.tevSources[2] = float4(0.0); @@ -729,13 +725,13 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; if (enable_fog) { - bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; - float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; - fog_index *= 128.0; - float clamped_index = clamp(floor(fog_index), 0.0, 127.0); - float delta = fog_index - clamped_index; - float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; - float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z; + fogIndex *= 128.0; + float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0); + float delta = fogIndex - clampedIndex; + float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg; + float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0); uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); @@ -743,9 +739,9 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; - float3 fog_color = float3(r, g, b); + float3 fogColor = float3(r, g, b); - color.rgb = mix(fog_color, color.rgb, fog_factor); + color.rgb = mix(fogColor, color.rgb, fogFactor); } // Perform alpha test From 272c24d8e44c0a46bd1f63bf5d5a9d30bcc5a663 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 31 Oct 2024 13:45:05 +0100 Subject: [PATCH 08/14] don't bind resources unnecessarily --- CMakeLists.txt | 11 +++- include/renderer_mtl/mtl_command_encoder.hpp | 58 ++++++++++++++++++++ include/renderer_mtl/mtl_common.hpp | 6 ++ include/renderer_mtl/objc_helper.hpp | 2 +- include/renderer_mtl/renderer_mtl.hpp | 7 ++- src/core/renderer_mtl/renderer_mtl.cpp | 37 +++++++------ src/host_shaders/metal_blit.metal | 29 ++++++++++ src/host_shaders/metal_shaders.metal | 19 ------- 8 files changed, 128 insertions(+), 41 deletions(-) create mode 100644 include/renderer_mtl/mtl_command_encoder.hpp create mode 100644 include/renderer_mtl/mtl_common.hpp create mode 100644 src/host_shaders/metal_blit.metal diff --git a/CMakeLists.txt b/CMakeLists.txt index 854b9b9e8..d25973c8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -483,6 +483,8 @@ if(ENABLE_METAL AND APPLE) include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp include/renderer_mtl/mtl_lut_texture.hpp + include/renderer_mtl/mtl_command_encoder.hpp + include/renderer_mtl/mtl_common.hpp include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/objc_helper.hpp ) @@ -494,7 +496,8 @@ if(ENABLE_METAL AND APPLE) src/core/renderer_mtl/mtl_lut_texture.cpp src/core/renderer_mtl/objc_helper.mm src/host_shaders/metal_shaders.metal - src/host_shaders/metal_copy_to_lut_texture.metal + src/host_shaders/metal_blit.metal + #src/host_shaders/metal_copy_to_lut_texture.metal ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) @@ -520,7 +523,8 @@ if(ENABLE_METAL AND APPLE) endfunction() add_metal_shader(metal_shaders) - add_metal_shader(metal_copy_to_lut_texture) + add_metal_shader(metal_blit) + #add_metal_shader(metal_copy_to_lut_texture) add_custom_target( compile_msl_shaders @@ -532,7 +536,8 @@ if(ENABLE_METAL AND APPLE) NAMESPACE RendererMTL WHENCE "src/host_shaders/" "src/host_shaders/metal_shaders.metallib" - "src/host_shaders/metal_copy_to_lut_texture.metallib" + "src/host_shaders/metal_blit.metallib" + #"src/host_shaders/metal_copy_to_lut_texture.metallib" ) add_dependencies(resources_renderer_mtl compile_msl_shaders) diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp new file mode 100644 index 000000000..be66699d0 --- /dev/null +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace Metal { + +struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; +}; + +class CommandEncoder { +public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; + + // Reset the render state + renderState = RenderState{}; + } + + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } + + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } + + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } + + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } + +private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + + RenderState renderState; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_common.hpp b/include/renderer_mtl/mtl_common.hpp new file mode 100644 index 000000000..a148520fb --- /dev/null +++ b/include/renderer_mtl/mtl_common.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp index 91756d241..7d0e86464 100644 --- a/include/renderer_mtl/objc_helper.hpp +++ b/include/renderer_mtl/objc_helper.hpp @@ -2,7 +2,7 @@ #include -#include +#include "mtl_common.hpp" namespace Metal { diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index e28b63b46..6b356896c 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -11,6 +11,7 @@ #include "mtl_depth_stencil_cache.hpp" #include "mtl_vertex_buffer_cache.hpp" #include "mtl_lut_texture.hpp" +#include "mtl_command_encoder.hpp" // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -46,6 +47,8 @@ class RendererMTL final : public Renderer { MTL::Device* device; MTL::CommandQueue* commandQueue; + Metal::CommandEncoder commandEncoder; + // Libraries MTL::Library* library; @@ -69,7 +72,7 @@ class RendererMTL final : public Renderer { // Pipelines MTL::RenderPipelineState* displayPipeline; - MTL::RenderPipelineState* copyToLutTexturePipeline; + //MTL::RenderPipelineState* copyToLutTexturePipeline; // Clears std::map colorClearOps; @@ -177,7 +180,7 @@ class RendererMTL final : public Renderer { Metal::DepthStencilRenderTarget& getDepthRenderTarget(); Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); - void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(); void updateLightingLUT(MTL::RenderCommandEncoder* encoder); void updateFogLUT(MTL::RenderCommandEncoder* encoder); void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index bf2cdab11..8401eecb1 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -165,7 +165,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Load shaders auto mtlResources = cmrc::RendererMTL::get_filesystem(); library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); - MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); + //MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // Display MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); @@ -188,8 +189,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { fragmentDisplayFunction->release(); // Blit - MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); - MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + MTL::Function* vertexBlitFunction = blitLibrary->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = blitLibrary->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); @@ -255,6 +256,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); // Copy to LUT texture + /* MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); @@ -279,6 +281,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { } copyToLutTexturePipelineDescriptor->release(); vertexCopyToLutTextureFunction->release(); + */ // Depth stencil cache depthStencilCache.set(device); @@ -293,7 +296,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { depthStencilDescriptor->release(); // Release - copyToLutTextureLibrary->release(); + blitLibrary->release(); + //copyToLutTextureLibrary->release(); } void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { @@ -528,8 +532,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetRenderPipelineState(pipeline); - renderCommandEncoder->setDepthStencilState(depthStencilState); + commandEncoder.setRenderPipelineState(pipeline); + commandEncoder.setDepthStencilState(depthStencilState); // If size is < 4KB, use inline vertex data, otherwise use a buffer if (vertices.size_bytes() < 4 * 1024) { renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); @@ -566,7 +570,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); @@ -589,7 +593,7 @@ void RendererMTL::deinitGraphicsContext() { delete lutFogTexture; // Release - copyToLutTexturePipeline->release(); + //copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); nullTexture->release(); @@ -687,7 +691,7 @@ void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { encoder->setFragmentBytes(&envState, sizeof(envState), 1); } -void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { +void RendererMTL::bindTexturesToSlots() { static constexpr std::array ioBases = { PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, @@ -696,8 +700,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - encoder->setFragmentTexture(nullTexture, i); - encoder->setFragmentSamplerState(nearestSampler, i); + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); continue; } @@ -713,8 +717,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { if (addr != 0) [[likely]] { Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); auto tex = getTexture(targetTex); - encoder->setFragmentTexture(tex.texture, i); - encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + commandEncoder.setFragmentTexture(tex.texture, i); + commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { // TODO: log } @@ -811,7 +815,7 @@ void RendererMTL::textureCopyImpl( Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; auto blitPipeline = blitPipelineCache.get(hash); - renderCommandEncoder->setRenderPipelineState(blitPipeline); + commandEncoder.setRenderPipelineState(blitPipeline); // Viewport renderCommandEncoder->setViewport(MTL::Viewport{ @@ -824,8 +828,8 @@ void RendererMTL::textureCopyImpl( // Bind resources renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); - renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0); - renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, GET_HELPER_TEXTURE_BINDING(0)); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, GET_HELPER_SAMPLER_STATE_BINDING(0)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } @@ -838,6 +842,7 @@ void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassD renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); // Bind persistent resources diff --git a/src/host_shaders/metal_blit.metal b/src/host_shaders/metal_blit.metal new file mode 100644 index 000000000..31b94ec44 --- /dev/null +++ b/src/host_shaders/metal_blit.metal @@ -0,0 +1,29 @@ +#include +using namespace metal; + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(GET_HELPER_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_HELPER_SAMPLER_STATE_BINDING(0))]]) { + return tex.sample(samplr, in.uv); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index c2c1799fa..18c310f77 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -32,25 +32,6 @@ fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d return tex.sample(samplr, in.uv); } -struct NDCViewport { - float2 offset; - float2 scale; -}; - -vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { - BasicVertexOut out; - out.uv = float2((vid << 1) & 2, vid & 2); - out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); - out.position.y = -out.position.y; - out.uv = out.uv * viewport.scale + viewport.offset; - - return out; -} - -fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { - return tex.sample(samplr, in.uv); -} - struct PicaRegs { uint regs[0x200 - 0x48]; From abe0709a81618deeda53845b59600f6b347f5a0a Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 1 Nov 2024 09:05:44 +0100 Subject: [PATCH 09/14] don't hardcode window size --- src/panda_sdl/frontend_sdl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 4bfa0087d..1aa79e879 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -88,7 +88,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp #ifdef PANDA3DS_ENABLE_METAL if (config.rendererType == RendererType::Metal) { - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); + window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::warn("Window creation failed: %s", SDL_GetError()); From 4cc62d487084d2686088058a623078027196272c Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 5 Nov 2024 20:01:20 +0100 Subject: [PATCH 10/14] use saved window position --- src/panda_sdl/frontend_sdl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 989789549..01f48acd4 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -93,7 +93,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp #ifdef PANDA3DS_ENABLE_METAL if (config.rendererType == RendererType::Metal) { - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::warn("Window creation failed: %s", SDL_GetError()); From 49b65242b900c9463eefca1077e2f627b43969af Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:09:12 +0200 Subject: [PATCH 11/14] First Metal cleanup & formatting pass --- include/renderer_gl/surface_cache.hpp | 2 - .../renderer_mtl/mtl_blit_pipeline_cache.hpp | 130 ++-- include/renderer_mtl/mtl_command_encoder.hpp | 104 ++- .../renderer_mtl/mtl_depth_stencil_cache.hpp | 148 ++--- .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 320 +++++----- include/renderer_mtl/mtl_lut_texture.hpp | 5 - include/renderer_mtl/mtl_render_target.hpp | 157 +++-- include/renderer_mtl/mtl_texture.hpp | 122 ++-- .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 144 ++--- include/renderer_mtl/objc_helper.hpp | 10 +- include/renderer_mtl/pica_to_mtl.hpp | 297 +++++---- include/renderer_mtl/renderer_mtl.hpp | 180 +++--- src/core/renderer_gl/etc1.cpp | 3 +- src/core/renderer_mtl/mtl_etc1.cpp | 222 ++++--- src/core/renderer_mtl/mtl_lut_texture.cpp | 53 +- src/core/renderer_mtl/mtl_texture.cpp | 604 +++++++++--------- src/core/renderer_mtl/renderer_mtl.cpp | 80 ++- 17 files changed, 1275 insertions(+), 1306 deletions(-) diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 7346fd11a..fb7c71a51 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,6 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - //static_assert(std::is_same() || std::is_same() || - // std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp index 264226353..02e075b28 100644 --- a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -7,69 +7,67 @@ using namespace PICA; namespace Metal { - -struct BlitPipelineHash { - // Formats - ColorFmt colorFmt; - DepthFmt depthFmt; -}; - -// This pipeline only caches the pipeline with all of its color and depth attachment variations -class BlitPipelineCache { -public: - BlitPipelineCache() = default; - - ~BlitPipelineCache() { - reset(); - vertexFunction->release(); - fragmentFunction->release(); - } - - void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { - device = dev; - vertexFunction = vert; - fragmentFunction = frag; - } - - MTL::RenderPipelineState* get(BlitPipelineHash hash) { - u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; - auto& pipeline = pipelineCache[intHash]; - if (!pipeline) { - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexFunction); - desc->setFragmentFunction(fragmentFunction); - - auto colorAttachment = desc->colorAttachments()->object(0); - colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); - - desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); - - NS::Error* error = nullptr; - desc->setLabel(toNSString("Blit pipeline")); - pipeline = device->newRenderPipelineState(desc, &error); - if (error) { - Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - - desc->release(); - } - - return pipeline; - } - - void reset() { - for (auto& pair : pipelineCache) { - pair.second->release(); - } - pipelineCache.clear(); - } - -private: - std::map pipelineCache; - - MTL::Device* device; - MTL::Function* vertexFunction; - MTL::Function* fragmentFunction; -}; - -} // namespace Metal + struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; + }; + + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class BlitPipelineCache { + public: + BlitPipelineCache() = default; + + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } + + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } + + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } + + private: + std::map pipelineCache; + + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp index be66699d0..562e6b79d 100644 --- a/include/renderer_mtl/mtl_command_encoder.hpp +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -3,56 +3,54 @@ #include namespace Metal { - -struct RenderState { - MTL::RenderPipelineState* renderPipelineState = nullptr; - MTL::DepthStencilState* depthStencilState = nullptr; - MTL::Texture* textures[3] = {nullptr}; - MTL::SamplerState* samplerStates[3] = {nullptr}; -}; - -class CommandEncoder { -public: - void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { - renderCommandEncoder = rce; - - // Reset the render state - renderState = RenderState{}; - } - - // Resource binding - void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { - if (renderPipelineState != renderState.renderPipelineState) { - renderCommandEncoder->setRenderPipelineState(renderPipelineState); - renderState.renderPipelineState = renderPipelineState; - } - } - - void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { - if (depthStencilState != renderState.depthStencilState) { - renderCommandEncoder->setDepthStencilState(depthStencilState); - renderState.depthStencilState = depthStencilState; - } - } - - void setFragmentTexture(MTL::Texture* texture, u32 index) { - if (texture != renderState.textures[index]) { - renderCommandEncoder->setFragmentTexture(texture, index); - renderState.textures[index] = texture; - } - } - - void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { - if (samplerState != renderState.samplerStates[index]) { - renderCommandEncoder->setFragmentSamplerState(samplerState, index); - renderState.samplerStates[index] = samplerState; - } - } - -private: - MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; - - RenderState renderState; -}; - -} // namespace Metal + struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; + }; + + class CommandEncoder { + public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; + + // Reset the render state + renderState = RenderState{}; + } + + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } + + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } + + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } + + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } + + private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + + RenderState renderState; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp index 90721b700..8f7256a94 100644 --- a/include/renderer_mtl/mtl_depth_stencil_cache.hpp +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -7,80 +7,74 @@ using namespace PICA; namespace Metal { - -struct DepthStencilHash { - bool depthStencilWrite; - u8 depthFunc; - u32 stencilConfig; - u16 stencilOpConfig; -}; - -class DepthStencilCache { -public: - DepthStencilCache() = default; - - ~DepthStencilCache() { - reset(); - } - - void set(MTL::Device* dev) { - device = dev; - } - - MTL::DepthStencilState* get(DepthStencilHash hash) { - u64 intHash = ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; - auto& depthStencilState = depthStencilCache[intHash]; - if (!depthStencilState) { - MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); - desc->setDepthWriteEnabled(hash.depthStencilWrite); - desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); - - const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); - MTL::StencilDescriptor* stencilDesc = nullptr; - if (stencilEnable) { - const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); - const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); - - const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; - - const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); - const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); - const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); - - stencilDesc = MTL::StencilDescriptor::alloc()->init(); - stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); - stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); - stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); - stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); - stencilDesc->setReadMask(stencilRefMask); - stencilDesc->setWriteMask(stencilBufferMask); - - desc->setFrontFaceStencil(stencilDesc); - desc->setBackFaceStencil(stencilDesc); - } - - depthStencilState = device->newDepthStencilState(desc); - - desc->release(); - if (stencilDesc) { - stencilDesc->release(); - } - } - - return depthStencilState; - } - - void reset() { - for (auto& pair : depthStencilCache) { - pair.second->release(); - } - depthStencilCache.clear(); - } - -private: - std::map depthStencilCache; - - MTL::Device* device; -}; - -} // namespace Metal + struct DepthStencilHash { + u32 stencilConfig; + u16 stencilOpConfig; + bool depthStencilWrite; + u8 depthFunc; + }; + + class DepthStencilCache { + public: + DepthStencilCache() = default; + + ~DepthStencilCache() { reset(); } + + void set(MTL::Device* dev) { device = dev; } + + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = + ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); + + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } + + depthStencilState = device->newDepthStencilState(desc); + + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } + + return depthStencilState; + } + + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } + + private: + std::map depthStencilCache; + MTL::Device* device; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index c5105a13c..ace324fee 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -7,171 +7,155 @@ using namespace PICA; namespace Metal { - -struct DrawFragmentFunctionHash { - bool lightingEnabled; // 1 bit - u8 lightingNumLights; // 3 bits - u32 lightingConfig1; // 32 bits (TODO: check this) - // | ref | func | on | - u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) -}; - -//bool operator==(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { -// return ((l.lightingEnabled == r.lightingEnabled) && (l.lightingNumLights == r.lightingNumLights) && -// (l.lightingConfig1 == r.lightingConfig1) && (l.alphaControl == r.alphaControl)); -//} - -inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { - if (!l.lightingEnabled && r.lightingEnabled) return true; - if (l.lightingNumLights < r.lightingNumLights) return true; - if (l.lightingConfig1 < r.lightingConfig1) return true; - if (l.alphaControl < r.alphaControl) return true; - - return false; -} - -struct DrawPipelineHash { // 56 bits - // Formats - ColorFmt colorFmt; // 3 bits - DepthFmt depthFmt; // 3 bits - - // Blending - bool blendEnabled; // 1 bit - // | functions | aeq | ceq | - u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) - u8 colorWriteMask; // 4 bits - - DrawFragmentFunctionHash fragHash; -}; - -//bool operator==(const DrawPipelineHash& l, const DrawPipelineHash& r) { -// return (((u32)l.colorFmt == (u32)r.colorFmt) && ((u32)l.depthFmt == (u32)r.depthFmt) && -// (l.blendEnabled == r.blendEnabled) && (l.blendControl == r.blendControl) && -// (l.colorWriteMask == r.colorWriteMask) && (l.fragHash == r.fragHash)); -//} - -inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { - if ((u32)l.colorFmt < (u32)r.colorFmt) return true; - if ((u32)l.depthFmt < (u32)r.depthFmt) return true; - if (!l.blendEnabled && r.blendEnabled) return true; - if (l.blendControl < r.blendControl) return true; - if (l.colorWriteMask < r.colorWriteMask) return true; - if (l.fragHash < r.fragHash) return true; - - return false; -} - -// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices -#define VERTEX_BUFFER_BINDING_INDEX 30 - -// This pipeline only caches the pipeline with all of its color and depth attachment variations -class DrawPipelineCache { -public: - DrawPipelineCache() = default; - - ~DrawPipelineCache() { - reset(); - vertexDescriptor->release(); - vertexFunction->release(); - } - - void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { - device = dev; - library = lib; - vertexFunction = vert; - vertexDescriptor = vertDesc; - } - - MTL::RenderPipelineState* get(DrawPipelineHash hash) { - //u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); - //u64 pipelineHash = ((u64)hash.colorFmt << 53) | ((u64)hash.depthFmt << 50) | ((u64)hash.blendEnabled << 49) | ((u64)hash.colorWriteMask << 45) | ((((u64)hash.blendControl & 0b11111111111111110000000000000000) >> 16) << 29) | ((((u64)hash.blendControl & 0b0000011100000000) >> 8) << 26) | (((u64)hash.blendControl & 0b00000111) << 23) | fragmentFunctionHash; - auto& pipeline = pipelineCache[hash]; - if (!pipeline) { - auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; - if (!fragmentFunction) { - MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); - constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); - constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); - constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); - constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); - - NS::Error* error = nullptr; - fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); - if (error) { - Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - constants->release(); - } - - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexFunction); - desc->setFragmentFunction(fragmentFunction); - desc->setVertexDescriptor(vertexDescriptor); - - auto colorAttachment = desc->colorAttachments()->object(0); - colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); - MTL::ColorWriteMask writeMask = 0; - if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; - if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; - if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; - if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; - colorAttachment->setWriteMask(writeMask); - if (hash.blendEnabled) { - const u8 rgbEquation = hash.blendControl & 0x7; - const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); - - // Get blending functions - const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); - const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); - const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); - const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); - - colorAttachment->setBlendingEnabled(true); - colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); - colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); - colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); - colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); - colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); - colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); - } - - MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); - desc->setDepthAttachmentPixelFormat(depthFormat); - if (hash.depthFmt == DepthFmt::Depth24Stencil8) - desc->setStencilAttachmentPixelFormat(depthFormat); - - NS::Error* error = nullptr; - desc->setLabel(toNSString("Draw pipeline")); - pipeline = device->newRenderPipelineState(desc, &error); - if (error) { - Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - - desc->release(); - } - - return pipeline; - } - - void reset() { - for (auto& pair : pipelineCache) { - pair.second->release(); - } - pipelineCache.clear(); - for (auto& pair : fragmentFunctionCache) { - pair.second->release(); - } - fragmentFunctionCache.clear(); - } - -private: - std::map pipelineCache; - std::map fragmentFunctionCache; - - MTL::Device* device; - MTL::Library* library; - MTL::Function* vertexFunction; - MTL::VertexDescriptor* vertexDescriptor; -}; - -} // namespace Metal + struct DrawFragmentFunctionHash { + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + u32 lightingConfig1; // 32 bits (TODO: check this) + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) + }; + + inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; + + return false; + } + + struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits + + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits + + DrawFragmentFunctionHash fragHash; + }; + + inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; + + return false; + } + + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class DrawPipelineCache { + public: + DrawPipelineCache() = default; + + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } + + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } + + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + auto& pipeline = pipelineCache[hash]; + + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } + + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } + + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } + + private: + std::map pipelineCache; + std::map fragmentFunctionCache; + + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; + }; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp index 162bfe25b..531dc73c0 100644 --- a/include/renderer_mtl/mtl_lut_texture.hpp +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -8,17 +8,12 @@ class LutTexture { public: LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); ~LutTexture(); - u32 getNextIndex(); - // Getters MTL::Texture* getTexture() { return texture; } - u32 getCurrentIndex() { return currentIndex; } - private: MTL::Texture* texture; - u32 currentIndex = 0; }; diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp index 73be45f4d..8f80ea647 100644 --- a/include/renderer_mtl/mtl_render_target.hpp +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -1,92 +1,91 @@ #pragma once +#include #include #include -#include + #include "boost/icl/interval.hpp" #include "helpers.hpp" #include "math_util.hpp" +#include "objc_helper.hpp" #include "opengl.hpp" #include "pica_to_mtl.hpp" -#include "objc_helper.hpp" template using Interval = boost::icl::right_open_interval; namespace Metal { - -template -struct RenderTarget { - MTL::Device* device; - - u32 location; - Format_t format; - OpenGL::uvec2 size; - bool valid; - - // Range of VRAM taken up by buffer - Interval range; - - MTL::Texture* texture = nullptr; - - RenderTarget() : valid(false) {} - - RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) - : device(dev), location(loc), format(format), size({x, y}), valid(valid) { - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } - - Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { - const u32 startOffset = (inputAddress - location) / sizePerPixel(format); - const u32 x0 = (startOffset % (size.x() * 8)) / 8; - const u32 y0 = (startOffset / (size.x() * 8)) * 8; - return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; - } - - // For 2 textures to "match" we only care about their locations, formats, and dimensions to match - // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture - bool matches(RenderTarget& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } - - void allocate() { - MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; - if (std::is_same::value) { - pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); - } else if (std::is_same::value) { - pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); - } else { - panic("Invalid format type"); - } - - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); - descriptor->setTextureType(MTL::TextureType2D); - descriptor->setPixelFormat(pixelFormat); - descriptor->setWidth(size.u()); - descriptor->setHeight(size.v()); - descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); - descriptor->setStorageMode(MTL::StorageModePrivate); - texture = device->newTexture(descriptor); - texture->setLabel(toNSString(std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); - descriptor->release(); - } - - void free() { - valid = false; - - if (texture) { - texture->release(); - } - } - - u64 sizeInBytes() { - return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); - } -}; - -typedef RenderTarget ColorRenderTarget; -typedef RenderTarget DepthStencilRenderTarget; - -} // namespace Metal + template + struct RenderTarget { + MTL::Device* device; + + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + MTL::Texture* texture = nullptr; + + RenderTarget() : valid(false) {} + + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + + std::to_string(size.v()) + )); + descriptor->release(); + } + + void free() { + valid = false; + + if (texture) { + texture->release(); + } + } + + u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); } + }; + + using ColorRenderTarget = RenderTarget; + using DepthStencilRenderTarget = RenderTarget; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 9cec268dc..51cb4c4b3 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -1,8 +1,9 @@ #pragma once +#include #include #include -#include + #include "PICA/regs.hpp" #include "boost/icl/interval.hpp" #include "helpers.hpp" @@ -10,69 +11,64 @@ #include "opengl.hpp" #include "renderer_mtl/pica_to_mtl.hpp" + template using Interval = boost::icl::right_open_interval; namespace Metal { - -struct Texture { - MTL::Device* device; - - u32 location; - u32 config; // Magnification/minification filter, wrapping configs, etc - PICA::TextureFmt format; - OpenGL::uvec2 size; - bool valid; - - // Range of VRAM taken up by buffer - Interval range; - - PICA::PixelFormatInfo formatInfo; - MTL::Texture* texture = nullptr; - MTL::SamplerState* sampler = nullptr; - - Texture() : valid(false) {} - - Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) - : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { - - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } - - // For 2 textures to "match" we only care about their locations, formats, and dimensions to match - // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture - bool matches(Texture& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } - - void allocate(); - void setNewConfig(u32 newConfig); - void decodeTexture(std::span data); - void free(); - u64 sizeInBytes(); - - u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - - // Get the morton interleave offset of a texel based on its U and V values - static u32 mortonInterleave(u32 u, u32 v); - // Get the byte offset of texel (u, v) in the texture - static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); - static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); - - // Returns the format of this texture as a string - std::string_view formatToString() { - return PICA::textureFormatToString(format); - } - - // Returns the texel at coordinates (u, v) of an ETC1(A4) texture - // TODO: Make hasAlpha a template parameter - u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); - u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); -}; - -} // namespace Metal + struct Texture { + MTL::Device* device; + + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; + + Texture() : valid(false) {} + + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); + + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + + // Returns the format of this texture as a string + std::string_view formatToString() { return PICA::textureFormatToString(format); } + + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index cc552477a..d53af2836 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -5,76 +5,74 @@ using namespace PICA; namespace Metal { - -struct BufferHandle { - MTL::Buffer* buffer; - size_t offset; -}; - -// 128MB buffer for caching vertex data -#define CACHE_BUFFER_SIZE 128 * 1024 * 1024 - -class VertexBufferCache { -public: - VertexBufferCache() = default; - - ~VertexBufferCache() { - endFrame(); - buffer->release(); - } - - void set(MTL::Device* dev) { - device = dev; - create(); - } - - void endFrame() { - ptr = 0; - for (auto buffer : additionalAllocations) { - buffer->release(); - } - additionalAllocations.clear(); - } - - BufferHandle get(const void* data, size_t size) { - // If the vertex buffer is too large, just create a new one - if (ptr + size > CACHE_BUFFER_SIZE) { - MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); - newBuffer->setLabel(toNSString("Additional vertex buffer")); - additionalAllocations.push_back(newBuffer); - Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); - - return BufferHandle{newBuffer, 0}; - } - - // Copy the data into the buffer - memcpy((char*)buffer->contents() + ptr, data, size); - - size_t oldPtr = ptr; - ptr += size; - - return BufferHandle{buffer, oldPtr}; - } - - void reset() { - endFrame(); - if (buffer) { - buffer->release(); - create(); - } - } - -private: - MTL::Buffer* buffer = nullptr; - size_t ptr = 0; - std::vector additionalAllocations; - - MTL::Device* device; - - void create() { - buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); - buffer->setLabel(toNSString("Shared vertex buffer")); - } -}; - -} // namespace Metal + struct BufferHandle { + MTL::Buffer* buffer; + size_t offset; + }; + + class VertexBufferCache { + // 128MB buffer for caching vertex data + static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024; + + public: + VertexBufferCache() = default; + + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } + + void set(MTL::Device* dev) { + device = dev; + create(); + } + + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } + + BufferHandle get(const void* data, size_t size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + + return BufferHandle{newBuffer, 0}; + } + + // Copy the data into the buffer + memcpy((char*)buffer->contents() + ptr, data, size); + + size_t oldPtr = ptr; + ptr += size; + + return BufferHandle{buffer, oldPtr}; + } + + void reset() { + endFrame(); + if (buffer) { + buffer->release(); + create(); + } + } + + private: + MTL::Buffer* buffer = nullptr; + size_t ptr = 0; + std::vector additionalAllocations; + + MTL::Device* device; + + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } + }; +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp index 7d0e86464..86992f1d2 100644 --- a/include/renderer_mtl/objc_helper.hpp +++ b/include/renderer_mtl/objc_helper.hpp @@ -5,12 +5,8 @@ #include "mtl_common.hpp" namespace Metal { - -dispatch_data_t createDispatchData(const void* data, size_t size); - -} // namespace Metal + dispatch_data_t createDispatchData(const void* data, size_t size); +} // namespace Metal // Cast from std::string to NS::String* -inline NS::String* toNSString(const std::string& str) { - return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); -} +inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); } \ No newline at end of file diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index de76dc3b5..9234c7485 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -1,155 +1,154 @@ #pragma once #include + #include "PICA/regs.hpp" -namespace PICA { -struct PixelFormatInfo { - MTL::PixelFormat pixelFormat; - size_t bytesPerTexel; -}; - -constexpr PixelFormatInfo pixelFormatInfos[14] = { - {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 - {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 - {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 - {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 - {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 - {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 - {MTL::PixelFormatRG8Unorm, 2}, // RG8 - {MTL::PixelFormatRGBA8Unorm, 4}, // I8 - {MTL::PixelFormatA8Unorm, 1}, // A8 - {MTL::PixelFormatABGR4Unorm, 2}, // IA4 - {MTL::PixelFormatABGR4Unorm, 2}, // I4 - {MTL::PixelFormatA8Unorm, 1}, // A4 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 -}; - -inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { - return pixelFormatInfos[static_cast(format)]; -} - -inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { - switch (format) { - case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; - case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; - case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? - case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? - case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; - } -} - -inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { - switch (format) { - case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; - case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; - case DepthFmt::Depth24: return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats - // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead - case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; - } -} - -inline MTL::CompareFunction toMTLCompareFunc(u8 func) { - switch (func) { - case 0: return MTL::CompareFunctionNever; - case 1: return MTL::CompareFunctionAlways; - case 2: return MTL::CompareFunctionEqual; - case 3: return MTL::CompareFunctionNotEqual; - case 4: return MTL::CompareFunctionLess; - case 5: return MTL::CompareFunctionLessEqual; - case 6: return MTL::CompareFunctionGreater; - case 7: return MTL::CompareFunctionGreaterEqual; - default: panic("Unknown compare function %u", func); - } - - return MTL::CompareFunctionAlways; -} - -inline MTL::BlendOperation toMTLBlendOperation(u8 op) { - switch (op) { - case 0: return MTL::BlendOperationAdd; - case 1: return MTL::BlendOperationSubtract; - case 2: return MTL::BlendOperationReverseSubtract; - case 3: return MTL::BlendOperationMin; - case 4: return MTL::BlendOperationMax; - case 5: return MTL::BlendOperationAdd; // Unused (same as 0) - case 6: return MTL::BlendOperationAdd; // Unused (same as 0) - case 7: return MTL::BlendOperationAdd; // Unused (same as 0) - default: panic("Unknown blend operation %u", op); - } - - return MTL::BlendOperationAdd; -} - -inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { - switch (factor) { - case 0: return MTL::BlendFactorZero; - case 1: return MTL::BlendFactorOne; - case 2: return MTL::BlendFactorSourceColor; - case 3: return MTL::BlendFactorOneMinusSourceColor; - case 4: return MTL::BlendFactorDestinationColor; - case 5: return MTL::BlendFactorOneMinusDestinationColor; - case 6: return MTL::BlendFactorSourceAlpha; - case 7: return MTL::BlendFactorOneMinusSourceAlpha; - case 8: return MTL::BlendFactorDestinationAlpha; - case 9: return MTL::BlendFactorOneMinusDestinationAlpha; - case 10: return MTL::BlendFactorBlendColor; - case 11: return MTL::BlendFactorOneMinusBlendColor; - case 12: return MTL::BlendFactorBlendAlpha; - case 13: return MTL::BlendFactorOneMinusBlendAlpha; - case 14: return MTL::BlendFactorSourceAlphaSaturated; - case 15: return MTL::BlendFactorOne; // Undocumented - default: panic("Unknown blend factor %u", factor); - } - - return MTL::BlendFactorOne; -} - -inline MTL::StencilOperation toMTLStencilOperation(u8 op) { - switch (op) { - case 0: return MTL::StencilOperationKeep; - case 1: return MTL::StencilOperationZero; - case 2: return MTL::StencilOperationReplace; - case 3: return MTL::StencilOperationIncrementClamp; - case 4: return MTL::StencilOperationDecrementClamp; - case 5: return MTL::StencilOperationInvert; - case 6: return MTL::StencilOperationIncrementWrap; - case 7: return MTL::StencilOperationDecrementWrap; - default: panic("Unknown stencil operation %u", op); - } - - return MTL::StencilOperationKeep; -} - -inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { - switch (primType) { - case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; - case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; - case PrimType::TriangleFan: - Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); - return MTL::PrimitiveTypeTriangle; - case PrimType::GeometryPrimitive: - //Helpers::warn("Geometry primitives are not yet, using triangles instead"); - return MTL::PrimitiveTypeTriangle; - } -} - -inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { - switch (addrMode) { - case 0: return MTL::SamplerAddressModeClampToEdge; - case 1: return MTL::SamplerAddressModeClampToBorderColor; - case 2: return MTL::SamplerAddressModeRepeat; - case 3: return MTL::SamplerAddressModeMirrorRepeat; - case 4: return MTL::SamplerAddressModeClampToEdge; - case 5: return MTL::SamplerAddressModeClampToBorderColor; - case 6: return MTL::SamplerAddressModeRepeat; - case 7: return MTL::SamplerAddressModeRepeat; - default: panic("Unknown sampler address mode %u", addrMode); - } - - return MTL::SamplerAddressModeClampToEdge; -} - -} // namespace PICA +namespace PICA { + struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; + }; + + constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 + }; + + inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast(format)]; } + + inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } + } + + inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: + return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } + } + + inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: panic("Unknown compare function %u", func); + } + + return MTL::CompareFunctionAlways; + } + + inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: panic("Unknown blend operation %u", op); + } + + return MTL::BlendOperationAdd; + } + + inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: panic("Unknown blend factor %u", factor); + } + + return MTL::BlendFactorOne; + } + + inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: panic("Unknown stencil operation %u", op); + } + + return MTL::StencilOperationKeep; + } + + inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + // Helpers::warn("Geometry primitives are not yet, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + } + } + + inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: panic("Unknown sampler address mode %u", addrMode); + } + + return MTL::SamplerAddressModeClampToEdge; + } +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 6b356896c..bd5c3bf11 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -3,15 +3,16 @@ #include #include -#include "renderer.hpp" -#include "mtl_texture.hpp" -#include "mtl_render_target.hpp" #include "mtl_blit_pipeline_cache.hpp" -#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_command_encoder.hpp" #include "mtl_depth_stencil_cache.hpp" -#include "mtl_vertex_buffer_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" #include "mtl_lut_texture.hpp" -#include "mtl_command_encoder.hpp" +#include "mtl_render_target.hpp" +#include "mtl_texture.hpp" +#include "mtl_vertex_buffer_cache.hpp" +#include "renderer.hpp" + // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -19,7 +20,7 @@ class GPU; struct Color4 { - float r, g, b, a; + float r, g, b, a; }; class RendererMTL final : public Renderer { @@ -72,7 +73,7 @@ class RendererMTL final : public Renderer { // Pipelines MTL::RenderPipelineState* displayPipeline; - //MTL::RenderPipelineState* copyToLutTexturePipeline; + // MTL::RenderPipelineState* copyToLutTexturePipeline; // Clears std::map colorClearOps; @@ -95,93 +96,112 @@ class RendererMTL final : public Renderer { } void endRenderPass() { - if (renderCommandEncoder) { - renderCommandEncoder->endEncoding(); - renderCommandEncoder = nullptr; - } + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } } - void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr); + void beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr + ); void commitCommandBuffer() { - if (renderCommandEncoder) { - renderCommandEncoder->endEncoding(); - renderCommandEncoder->release(); - renderCommandEncoder = nullptr; - } - if (commandBuffer) { - commandBuffer->commit(); - // HACK - commandBuffer->waitUntilCompleted(); - commandBuffer->release(); - commandBuffer = nullptr; - } - } - - template - inline void clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, SetClearDataT setClearData) { - bool beginRenderPass = (renderPassDescriptor == nullptr); - if (!renderPassDescriptor) { - renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); - } - - AttachmentT* attachment = getAttachment(renderPassDescriptor); + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } + + template + inline void clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, + SetClearDataT setClearData + ) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } + + AttachmentT* attachment = getAttachment(renderPassDescriptor); attachment->setTexture(texture); setClearData(attachment, clearData); attachment->setLoadAction(MTL::LoadActionClear); attachment->setStoreAction(MTL::StoreActionStore); if (beginRenderPass) { - if (std::is_same::value) - beginRenderPassIfNeeded(renderPassDescriptor, true, texture); - else - beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); } - } - - template - inline bool clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, GetAttachmentT getAttachment, SetClearDataT setClearData) { - auto it = clearOps.find(texture); - if (it != clearOps.end()) { - clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); - clearOps.erase(it); - return true; - } - - if (renderPassDescriptor) { - AttachmentT* attachment = getAttachment(renderPassDescriptor); - attachment->setTexture(texture); - attachment->setLoadAction(MTL::LoadActionLoad); - attachment->setStoreAction(MTL::StoreActionStore); - } - - return false; - } - - bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, colorClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, [](auto attachment, auto& color) { - attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); - }); - } - - bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, depthClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, [](auto attachment, auto& depth) { - attachment->setClearDepth(depth); - }); - } - - bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, stencilClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, [](auto attachment, auto& stencil) { - attachment->setClearStencil(stencil); - }); - } - - std::optional getColorRenderTarget(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); + } + + template + inline bool clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, + GetAttachmentT getAttachment, SetClearDataT setClearData + ) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } + + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } + + return false; + } + + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, colorClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, + [](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); } + ); + } + + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, depthClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, + [](auto attachment, auto& depth) { attachment->setClearDepth(depth); } + ); + } + + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, stencilClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, + [](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); } + ); + } + + std::optional getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true + ); Metal::DepthStencilRenderTarget& getDepthRenderTarget(); Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); void bindTexturesToSlots(); void updateLightingLUT(MTL::RenderCommandEncoder* encoder); void updateFogLUT(MTL::RenderCommandEncoder* encoder); - void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); + void textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect + ); }; diff --git a/src/core/renderer_gl/etc1.cpp b/src/core/renderer_gl/etc1.cpp index 8aefd622c..0b4ed1a56 100644 --- a/src/core/renderer_gl/etc1.cpp +++ b/src/core/renderer_gl/etc1.cpp @@ -12,8 +12,9 @@ static constexpr u32 signExtend3To32(u32 val) { u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { // Pixel offset of the 8x8 tile based on u, v and the width of the texture u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) + if (!hasAlpha) { offs >>= 1; + } // In-tile offsets for u/v u &= 7; diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp index a414df3c9..420a60cad 100644 --- a/src/core/renderer_mtl/mtl_etc1.cpp +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -1,124 +1,116 @@ #include + #include "colour.hpp" -#include "renderer_mtl/renderer_mtl.hpp" #include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/renderer_mtl.hpp" + using namespace Helpers; namespace Metal { - -static constexpr u32 signExtend3To32(u32 val) { - return (u32)(s32(val) << 29 >> 29); -} - -u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { - // Pixel offset of the 8x8 tile based on u, v and the width of the texture - u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) - offs >>= 1; - - // In-tile offsets for u/v - u &= 7; - v &= 7; - - // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles - // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes - const u32 subTileSize = hasAlpha ? 16 : 8; - const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? - - // In-subtile offsets for u/v - u &= 3; - v &= 3; - offs += subTileSize * subTileIndex; - - u32 alpha; - const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* - - if (hasAlpha) { - // First 64 bits of the 4x4 subtile are alpha data - const u64 alphaData = *ptr++; - alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); - } - else { - alpha = 0xff; // ETC1 without alpha uses ff for every pixel + static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); } - // Next 64 bits of the subtile are colour data - u64 colourData = *ptr; - return decodeETC(alpha, u, v, colourData); -} - -u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { - static constexpr u32 modifiers[8][2] = { - { 2, 8 }, - { 5, 17 }, - { 9, 29 }, - { 13, 42 }, - { 18, 60 }, - { 24, 80 }, - { 33, 106 }, - { 47, 183 }, - }; - - // Parse colour data for 4x4 block - const u32 subindices = getBits<0, 16, u32>(colourData); - const u32 negationFlags = getBits<16, 16, u32>(colourData); - const bool flip = getBit<32>(colourData); - const bool diffMode = getBit<33>(colourData); - - // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits - const u32 tableIndex1 = getBits<37, 3, u32>(colourData); - const u32 tableIndex2 = getBits<34, 3, u32>(colourData); - const u32 texelIndex = u * 4 + v; // Index of the texel in the block - - if (flip) - std::swap(u, v); - - s32 r, g, b; - if (diffMode) { - r = getBits<59, 5, s32>(colourData); - g = getBits<51, 5, s32>(colourData); - b = getBits<43, 5, s32>(colourData); - - if (u >= 2) { - r += signExtend3To32(getBits<56, 3, u32>(colourData)); - g += signExtend3To32(getBits<48, 3, u32>(colourData)); - b += signExtend3To32(getBits<40, 3, u32>(colourData)); - } - - // Expand from 5 to 8 bits per channel - r = Colour::convert5To8Bit(r); - g = Colour::convert5To8Bit(g); - b = Colour::convert5To8Bit(b); - } else { - if (u < 2) { - r = getBits<60, 4, s32>(colourData); - g = getBits<52, 4, s32>(colourData); - b = getBits<44, 4, s32>(colourData); - } else { - r = getBits<56, 4, s32>(colourData); - g = getBits<48, 4, s32>(colourData); - b = getBits<40, 4, s32>(colourData); - } - - // Expand from 4 to 8 bits per channel - r = Colour::convert4To8Bit(r); - g = Colour::convert4To8Bit(g); - b = Colour::convert4To8Bit(b); - } - - const u32 index = (u < 2) ? tableIndex1 : tableIndex2; - s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; - - if (((negationFlags >> texelIndex) & 1) != 0) { - modifier = -modifier; - } - - r = std::clamp(r + modifier, 0, 255); - g = std::clamp(g + modifier, 0, 255); - b = std::clamp(b + modifier, 0, 255); - - return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); -} - -} // namespace Metal + u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) { + offs >>= 1; + } + + // In-tile offsets for u/v + u &= 7; + v &= 7; + + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? + + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; + + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* + + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } + + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); + } + + u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, + }; + + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); + + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block + + if (flip) std::swap(u, v); + + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); + + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } + + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp index ac4ff6d93..8486a50c0 100644 --- a/src/core/renderer_mtl/mtl_lut_texture.cpp +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -1,32 +1,27 @@ #include "renderer_mtl/renderer_mtl.hpp" namespace Metal { - -constexpr u32 LAYER_COUNT = 1024; - -LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { - MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); - desc->setTextureType(type); - desc->setPixelFormat(pixelFormat); - desc->setWidth(width); - desc->setHeight(height); - desc->setArrayLength(LAYER_COUNT); - desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/); - desc->setStorageMode(MTL::StorageModeShared); - - texture = device->newTexture(desc); - texture->setLabel(toNSString(name)); - desc->release(); -} - -LutTexture::~LutTexture() { - texture->release(); -} - -u32 LutTexture::getNextIndex() { - currentIndex = (currentIndex + 1) % LAYER_COUNT; - - return currentIndex; -} - -} // namespace Metal + static constexpr u32 LAYER_COUNT = 1024; + + LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead /* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); + + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); + } + + LutTexture::~LutTexture() { texture->release(); } + + u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + return currentIndex; + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp index b61c55021..149fea26a 100644 --- a/src/core/renderer_mtl/mtl_texture.cpp +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -1,312 +1,308 @@ #include "renderer_mtl/mtl_texture.hpp" -#include "renderer_mtl/objc_helper.hpp" -#include "colour.hpp" + #include +#include "colour.hpp" +#include "renderer_mtl/objc_helper.hpp" + + using namespace Helpers; namespace Metal { + void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + "Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()) + )); + descriptor->release(); + + setNewConfig(config); + } + + // Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on + void Texture::setNewConfig(u32 cfg) { + config = cfg; + + if (sampler) { + sampler->release(); + } + + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); + + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); + } + + void Texture::free() { + valid = false; + + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } + } + + u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); + + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; + + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; + + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: return pixelCount * 2; + + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: return pixelCount; + + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: return pixelCount / 2; + + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } + + default: Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } + } + + // u and v are the UVs of the relevant texel + // Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here + // https://en.wikipedia.org/wiki/Z-order_curve + // Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel + // The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 + // As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg + u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21}; + static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42}; + + return xOffsets[u & 7] + yOffsets[v & 7]; + } + + // Get the byte offset of texel (u, v) in the texture + u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; + } + + // Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte + u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; + } + + u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + // A8 + return alpha; + } + + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; + + // A8 + return alpha; + } + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; + + // RG8 + return (g << 8) | r; + } + + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); + + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } + + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); + + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } + + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); + + // B5G6R5 + return (r << 11) | (g << 5) | b; + } + + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } + + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; + + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; + + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; + + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } -void Texture::allocate() { - formatInfo = PICA::getPixelFormatInfo(format); - - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); - descriptor->setTextureType(MTL::TextureType2D); - descriptor->setPixelFormat(formatInfo.pixelFormat); - descriptor->setWidth(size.u()); - descriptor->setHeight(size.v()); - descriptor->setUsage(MTL::TextureUsageShaderRead); - descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? - texture = device->newTexture(descriptor); - texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); - descriptor->release(); - - setNewConfig(config); -} - -// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on -void Texture::setNewConfig(u32 cfg) { - config = cfg; - - if (sampler) { - sampler->release(); - } - - const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; - const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; - const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); - const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); - - MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); - samplerDescriptor->setMinFilter(minFilter); - samplerDescriptor->setMagFilter(magFilter); - samplerDescriptor->setSAddressMode(wrapS); - samplerDescriptor->setTAddressMode(wrapT); - - samplerDescriptor->setLabel(toNSString("Sampler")); - sampler = device->newSamplerState(samplerDescriptor); - samplerDescriptor->release(); -} - -void Texture::free() { - valid = false; - - if (texture) { - texture->release(); + void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); } - if (sampler) { - sampler->release(); - } -} - -u64 Texture::sizeInBytes() { - u64 pixelCount = u64(size.x()) * u64(size.y()); - - switch (format) { - case PICA::TextureFmt::RGBA8: // 4 bytes per pixel - return pixelCount * 4; - - case PICA::TextureFmt::RGB8: // 3 bytes per pixel - return pixelCount * 3; - - case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel - case PICA::TextureFmt::RGB565: - case PICA::TextureFmt::RGBA4: - case PICA::TextureFmt::RG8: - case PICA::TextureFmt::IA8: - return pixelCount * 2; - - case PICA::TextureFmt::A8: // 1 byte per pixel - case PICA::TextureFmt::I8: - case PICA::TextureFmt::IA4: - return pixelCount; - - case PICA::TextureFmt::I4: // 4 bits per pixel - case PICA::TextureFmt::A4: - return pixelCount / 2; - - case PICA::TextureFmt::ETC1: // Compressed formats - case PICA::TextureFmt::ETC1A4: { - // Number of 4x4 tiles - const u64 tileCount = pixelCount / 16; - // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 - const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; - return tileCount * tileSize; - } - - default: - Helpers::panic("[PICA] Attempted to get size of invalid texture type"); - } -} - -// u and v are the UVs of the relevant texel -// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here -// https://en.wikipedia.org/wiki/Z-order_curve -// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel -// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 -// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg -u32 Texture::mortonInterleave(u32 u, u32 v) { - static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 }; - static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 }; - - return xOffsets[u & 7] + yOffsets[v & 7]; -} - -// Get the byte offset of texel (u, v) in the texture -u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel - - return offset * bytesPerPixel; -} - -// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte -u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel - - return offset / 2; -} - -u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::A4: { - const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); - - // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates - u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); - alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); - - // A8 - return alpha; - } - - case PICA::TextureFmt::A8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 alpha = data[offset]; - - // A8 - return alpha; - } - - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} - -u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::RG8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); - constexpr u8 b = 0; - const u8 g = data[offset]; - const u8 r = data[offset + 1]; - - // RG8 - return (g << 8) | r; - } - - case PICA::TextureFmt::RGBA4: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); - u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - - u8 alpha = getBits<0, 4, u8>(texel); - u8 b = getBits<4, 4, u8>(texel); - u8 g = getBits<8, 4, u8>(texel); - u8 r = getBits<12, 4, u8>(texel); - - // ABGR4 - return (r << 12) | (g << 8) | (b << 4) | alpha; - } - - case PICA::TextureFmt::RGBA5551: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 2); - const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - - u8 alpha = getBit<0>(texel) ? 0xff : 0; - u8 b = getBits<1, 5, u8>(texel); - u8 g = getBits<6, 5, u8>(texel); - u8 r = getBits<11, 5, u8>(texel); - - // BGR5A1 - return (alpha << 15) | (r << 10) | (g << 5) | b; - } - - case PICA::TextureFmt::RGB565: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 2); - const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - - const u8 b = getBits<0, 5, u8>(texel); - const u8 g = getBits<5, 6, u8>(texel); - const u8 r = getBits<11, 5, u8>(texel); - - // B5G6R5 - return (r << 11) | (g << 5) | b; - } - - case PICA::TextureFmt::IA4: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 texel = data[offset]; - const u8 alpha = texel & 0xf; - const u8 intensity = texel >> 4; - - // ABGR4 - return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; - } - - case PICA::TextureFmt::I4: { - u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); - - // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates - u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); - intensity = getBits<0, 4>(intensity); - - // ABGR4 - return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; - } - - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} - -u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::RGB8: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 3); - const u8 b = data[offset]; - const u8 g = data[offset + 1]; - const u8 r = data[offset + 2]; - - // RGBA8 - return (0xff << 24) | (b << 16) | (g << 8) | r; - } - - case PICA::TextureFmt::RGBA8: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 4); - const u8 alpha = data[offset]; - const u8 b = data[offset + 1]; - const u8 g = data[offset + 2]; - const u8 r = data[offset + 3]; - - // RGBA8 - return (alpha << 24) | (b << 16) | (g << 8) | r; - } - - case PICA::TextureFmt::I8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 intensity = data[offset]; - - // RGBA8 - return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; - } - - case PICA::TextureFmt::IA8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); - - // Same as I8 except each pixel gets its own alpha value too - const u8 alpha = data[offset]; - const u8 intensity = data[offset + 1]; - - // RGBA8 - return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; - } - - case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); - case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); - - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} - -void Texture::decodeTexture(std::span data) { - std::vector decoded; - decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); - - // Decode texels line by line - for (u32 v = 0; v < size.v(); v++) { - for (u32 u = 0; u < size.u(); u++) { - if (formatInfo.bytesPerTexel == 1) { - u8 texel = decodeTexelU8(u, v, format, data); - decoded.push_back(texel); - } else if (formatInfo.bytesPerTexel == 2) { - u16 texel = decodeTexelU16(u, v, format, data); - decoded.push_back((texel & 0x00ff) >> 0); - decoded.push_back((texel & 0xff00) >> 8); - } else if (formatInfo.bytesPerTexel == 4) { - u32 texel = decodeTexelU32(u, v, format, data); - decoded.push_back((texel & 0x000000ff) >> 0); - decoded.push_back((texel & 0x0000ff00) >> 8); - decoded.push_back((texel & 0x00ff0000) >> 16); - decoded.push_back((texel & 0xff000000) >> 24); - } else { - Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); - } - } - } - - texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); -} - -} // namespace Metal +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 8401eecb1..a0c1888a6 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -2,9 +2,10 @@ #include #include + #include "renderer_mtl/mtl_lut_texture.hpp" -// HACK +// Hack: Apple annoyingly defines a global "NO" macro which ends up conflicting with our own code... #undef NO #include "PICA/gpu.hpp" @@ -14,8 +15,10 @@ using namespace PICA; CMRC_DECLARE(RendererMTL); -const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; -const u32 FOG_LUT_TEXTURE_WIDTH = 128; +static constexpr u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +static constexpr u32 FOG_LUT_TEXTURE_WIDTH = 128; +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +static constexpr uint VERTEX_BUFFER_BINDING_INDEX = 30; // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { @@ -40,6 +43,7 @@ MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) : Renderer(gpu, internalRegs, externalRegs) {} + RendererMTL::~RendererMTL() {} void RendererMTL::reset() { @@ -78,7 +82,7 @@ void RendererMTL::display() { clearColor(nullptr, bottomScreen->get().texture); } - // -------- Draw -------- + // Draw commandBuffer->pushDebugGroup(toNSString("Display")); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); @@ -130,8 +134,6 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { metalLayer->setDevice(device); commandQueue = device->newCommandQueue(); - // -------- Objects -------- - // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType2D); @@ -157,7 +159,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { samplerDescriptor->release(); - lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"); + lutLightingTexture = new Metal::LutTexture( + device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture" + ); lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); // -------- Pipelines -------- @@ -166,7 +170,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { auto mtlResources = cmrc::RendererMTL::get_filesystem(); library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); - //MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + // MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // Display MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); @@ -295,9 +299,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); depthStencilDescriptor->release(); - // Release blitLibrary->release(); - //copyToLutTextureLibrary->release(); + // copyToLutTextureLibrary->release(); } void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { @@ -592,8 +595,7 @@ void RendererMTL::deinitGraphicsContext() { delete lutLightingTexture; delete lutFogTexture; - // Release - //copyToLutTexturePipeline->release(); + // copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); nullTexture->release(); @@ -700,9 +702,9 @@ void RendererMTL::bindTexturesToSlots() { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - commandEncoder.setFragmentTexture(nullTexture, i); - commandEncoder.setFragmentSamplerState(nearestSampler, i); - continue; + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); + continue; } const size_t ioBase = ioBases[i]; @@ -736,7 +738,9 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { } u32 index = lutLightingTexture->getNextIndex(); - lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0); + lutLightingTexture->getTexture()->replaceRegion( + MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0 + ); /* endRenderPass(); @@ -768,7 +772,7 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { gpu.fogLUTDirty = false; - std::array fogLut = {0.0f}; + std::array fogLut = {0.0f}; for (int i = 0; i < fogLut.size(); i += 2) { const uint32_t value = gpu.fogLUT[i >> 1]; @@ -807,7 +811,8 @@ void RendererMTL::textureCopyImpl( ) { nextRenderPassName = "Texture copy"; MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); - // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole + // texture bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); @@ -819,11 +824,13 @@ void RendererMTL::textureCopyImpl( // Viewport renderCommandEncoder->setViewport(MTL::Viewport{ - double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0 - }); + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + float srcRectNDC[4] = { - srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), - (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v() + srcRect.left / (float)srcFramebuffer.size.u(), + srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), + (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v(), }; // Bind resources @@ -834,25 +841,28 @@ void RendererMTL::textureCopyImpl( renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } -void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) { +void RendererMTL::beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture +) { createCommandBufferIfNeeded(); - if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { - endRenderPass(); + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || + (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); - renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); - renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); - commandEncoder.newRenderCommandEncoder(renderCommandEncoder); + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); - // Bind persistent resources + // Bind persistent resources - // LUT texture - renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); - renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); - renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); - lastColorTexture = colorTexture; - lastDepthTexture = depthTexture; + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; } renderPassDescriptor->release(); From 10451a676b9d37b99700bb0d2fbf617fba80211b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:25:24 +0200 Subject: [PATCH 12/14] Metal: Remove padding in DrawFragmentFunctionHash --- include/renderer_mtl/mtl_draw_pipeline_cache.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index ace324fee..47ea53146 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -8,9 +8,9 @@ using namespace PICA; namespace Metal { struct DrawFragmentFunctionHash { + u32 lightingConfig1; // 32 bits (TODO: check this) bool lightingEnabled; // 1 bit u8 lightingNumLights; // 3 bits - u32 lightingConfig1; // 32 bits (TODO: check this) // | ref | func | on | u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) }; From d7e4cf18b56ab4fa2e1cda034268c765fd50442d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:44:41 +0200 Subject: [PATCH 13/14] Metal: IWYU fixes --- include/renderer_mtl/mtl_blit_pipeline_cache.hpp | 1 + include/renderer_mtl/mtl_draw_pipeline_cache.hpp | 1 + include/renderer_mtl/mtl_texture.hpp | 1 - include/renderer_mtl/mtl_vertex_buffer_cache.hpp | 15 ++++++++++----- include/renderer_mtl/pica_to_mtl.hpp | 12 +++++------- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp index 02e075b28..1fa47f42d 100644 --- a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -2,6 +2,7 @@ #include +#include "objc_helper.hpp" #include "pica_to_mtl.hpp" using namespace PICA; diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index 47ea53146..7178785e3 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -2,6 +2,7 @@ #include +#include "objc_helper.hpp" #include "pica_to_mtl.hpp" using namespace PICA; diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 51cb4c4b3..93103091a 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -11,7 +11,6 @@ #include "opengl.hpp" #include "renderer_mtl/pica_to_mtl.hpp" - template using Interval = boost::icl::right_open_interval; diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index d53af2836..b392389c7 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -1,13 +1,17 @@ #pragma once +#include + +#include "helpers.hpp" #include "pica_to_mtl.hpp" + using namespace PICA; namespace Metal { struct BufferHandle { MTL::Buffer* buffer; - size_t offset; + usize offset; }; class VertexBufferCache { @@ -35,7 +39,7 @@ namespace Metal { additionalAllocations.clear(); } - BufferHandle get(const void* data, size_t size) { + BufferHandle get(const void* data, usize size) { // If the vertex buffer is too large, just create a new one if (ptr + size > CACHE_BUFFER_SIZE) { MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); @@ -47,9 +51,9 @@ namespace Metal { } // Copy the data into the buffer - memcpy((char*)buffer->contents() + ptr, data, size); + std::memcpy((char*)buffer->contents() + ptr, data, size); - size_t oldPtr = ptr; + auto oldPtr = ptr; ptr += size; return BufferHandle{buffer, oldPtr}; @@ -57,6 +61,7 @@ namespace Metal { void reset() { endFrame(); + if (buffer) { buffer->release(); create(); @@ -65,7 +70,7 @@ namespace Metal { private: MTL::Buffer* buffer = nullptr; - size_t ptr = 0; + usize ptr = 0; std::vector additionalAllocations; MTL::Device* device; diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index 9234c7485..715088b45 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -4,7 +4,6 @@ #include "PICA/regs.hpp" - namespace PICA { struct PixelFormatInfo { MTL::PixelFormat pixelFormat; @@ -61,7 +60,7 @@ namespace PICA { case 5: return MTL::CompareFunctionLessEqual; case 6: return MTL::CompareFunctionGreater; case 7: return MTL::CompareFunctionGreaterEqual; - default: panic("Unknown compare function %u", func); + default: Helpers::panic("Unknown compare function %u", func); } return MTL::CompareFunctionAlways; @@ -77,7 +76,7 @@ namespace PICA { case 5: return MTL::BlendOperationAdd; // Unused (same as 0) case 6: return MTL::BlendOperationAdd; // Unused (same as 0) case 7: return MTL::BlendOperationAdd; // Unused (same as 0) - default: panic("Unknown blend operation %u", op); + default: Helpers::panic("Unknown blend operation %u", op); } return MTL::BlendOperationAdd; @@ -101,7 +100,7 @@ namespace PICA { case 13: return MTL::BlendFactorOneMinusBlendAlpha; case 14: return MTL::BlendFactorSourceAlphaSaturated; case 15: return MTL::BlendFactorOne; // Undocumented - default: panic("Unknown blend factor %u", factor); + default: Helpers::panic("Unknown blend factor %u", factor); } return MTL::BlendFactorOne; @@ -117,7 +116,7 @@ namespace PICA { case 5: return MTL::StencilOperationInvert; case 6: return MTL::StencilOperationIncrementWrap; case 7: return MTL::StencilOperationDecrementWrap; - default: panic("Unknown stencil operation %u", op); + default: Helpers::panic("Unknown stencil operation %u", op); } return MTL::StencilOperationKeep; @@ -131,7 +130,6 @@ namespace PICA { Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); return MTL::PrimitiveTypeTriangle; case PrimType::GeometryPrimitive: - // Helpers::warn("Geometry primitives are not yet, using triangles instead"); return MTL::PrimitiveTypeTriangle; } } @@ -146,7 +144,7 @@ namespace PICA { case 5: return MTL::SamplerAddressModeClampToBorderColor; case 6: return MTL::SamplerAddressModeRepeat; case 7: return MTL::SamplerAddressModeRepeat; - default: panic("Unknown sampler address mode %u", addrMode); + default: Helpers::panic("Unknown sampler address mode %u", addrMode); } return MTL::SamplerAddressModeClampToEdge; From e47923704e4c069ac2e3f882c69f915e3abb8a7b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 14:47:58 +0200 Subject: [PATCH 14/14] Relieve @SamoZ256 of dark memories --- src/core/renderer_mtl/renderer_mtl.cpp | 49 ++------------------------ 1 file changed, 2 insertions(+), 47 deletions(-) diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index a0c1888a6..df1f8b474 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -114,9 +114,7 @@ void RendererMTL::display() { endRenderPass(); commandBuffer->presentDrawable(drawable); - commandBuffer->popDebugGroup(); - commitCommandBuffer(); // Inform the vertex buffer cache that the frame ended @@ -623,12 +621,10 @@ std::optional RendererMTL::getColorRenderTarget( // Otherwise create and cache a new buffer. Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); - auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); // Clear the color buffer colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; - return colorBuffer; } @@ -722,7 +718,8 @@ void RendererMTL::bindTexturesToSlots() { commandEncoder.setFragmentTexture(tex.texture, i); commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { - // TODO: log + // TODO: Bind a blank texture here. Some games, like Pokemon X, will render with a texture bound to nullptr, triggering GPU open bus + // Binding a blank texture makes all of those games look normal } } } @@ -741,32 +738,6 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { lutLightingTexture->getTexture()->replaceRegion( MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0 ); - - /* - endRenderPass(); - - Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); - - auto blitCommandEncoder = commandBuffer->blitCommandEncoder(); - blitCommandEncoder->copyFromBuffer(buffer.buffer, buffer.offset, LIGHT_LUT_TEXTURE_WIDTH * 2 * 4, 0, MTL::Size(LIGHT_LUT_TEXTURE_WIDTH, - Lights::LUT_Count, 1), lutLightingTexture, 0, 0, MTL::Origin(0, 0, 0)); - - blitCommandEncoder->endEncoding(); - */ - - /* - renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); - renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); - Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); - renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); - u32 arrayOffset = 0; - renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), GPU::LightingLutSize); - - MTL::Resource* barrierResources[] = {lutLightingTexture}; - renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); - */ } void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { @@ -787,22 +758,6 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { u32 index = lutFogTexture->getNextIndex(); lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0); - - /* - renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); - renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); - // Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); - // renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); - renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); - u32 arrayOffset = (u32)Lights::LUT_Count; - renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(128)); - - MTL::Resource* barrierResources[] = {lutLightingTexture}; - renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); - */ } void RendererMTL::textureCopyImpl(