From f5b7390e2e028a1f9f9355ba81a4d47ef26728b2 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 28 Nov 2024 21:00:26 +0200 Subject: [PATCH] x64 Shader JIT: Optimize SSE4.1 blending path --- src/core/PICA/dynapica/shader_rec_emitter_x64.cpp | 9 ++++----- src/libretro_core.cpp | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index 142ff8c85..ddec3a36b 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -370,12 +370,11 @@ void ShaderEmitter::storeRegister(Xmm source, const PICAShader& shader, u32 dest } else if (haveSSE4_1) { // Bit reverse the write mask because that is what blendps expects u32 adjustedMask = ((writeMask >> 3) & 0b1) | ((writeMask >> 1) & 0b10) | ((writeMask << 1) & 0b100) | ((writeMask << 3) & 0b1000); - // Don't accidentally overwrite scratch1 if that is what we're writing derp - Xmm temp = (source == scratch1) ? scratch2 : scratch1; - movaps(temp, xword[statePointer + offset]); // Read current value of dest - blendps(temp, source, adjustedMask); // Blend with source - movaps(xword[statePointer + offset], temp); // Write back + // Blend current value of dest with source. We have to invert the bits of the mask, as we do blendps source, dest instead of dest, source + // Note: This destroys source + blendps(source, xword[statePointer + offset], adjustedMask ^ 0xF); + movaps(xword[statePointer + offset], source); // Write back } else { // Blend algo referenced from Citra const u8 selector = (((writeMask & 0b1000) ? 1 : 0) << 0) | diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index f863c3dd4..1e5b532df 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -174,8 +174,8 @@ static void configInit() { {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, {"panda3ds_audio_volume", "Audio volume; 100|0|10|20|40|60|80|90|100|120|140|150|180|200"}, {"panda3ds_mute_audio", "Mute audio; disabled|enabled"}, - {"panda3ds_enable_aac", "Enable AAC audio; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"}, {"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"}, {"panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled"},