diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index b782c094bc6e74..f5d3f93ea5a099 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -280,6 +280,10 @@ set(pcsx2SPU2Sources SPU2/Wavedump_wav.cpp ) +set(pcsx2SPU2SourcesUnshared + SPU2/ReverbResample.cpp +) + # SPU2 headers set(pcsx2SPU2Headers SPU2/Debug.h @@ -762,7 +766,7 @@ if(DISABLE_ADVANCE_SIMD) # Note: ld64 (macOS's linker) does not act the same way when presented with .a files, unless linked with `-force_load` (cmake WHOLE_ARCHIVE). set(is_first_isa "1") foreach(isa "sse4" "avx" "avx2") - add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared}) + add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared} ${pcsx2SPU2SourcesUnshared}) target_link_libraries(GS-${isa} PRIVATE PCSX2_FLAGS) target_compile_definitions(GS-${isa} PRIVATE MULTI_ISA_UNSHARED_COMPILATION=isa_${isa} MULTI_ISA_IS_FIRST=${is_first_isa} ${pcsx2_defs_${isa}}) target_compile_options(GS-${isa} PRIVATE ${compile_options_${isa}}) @@ -778,6 +782,7 @@ if(DISABLE_ADVANCE_SIMD) else() list(APPEND pcsx2GSSources ${pcsx2GSSourcesUnshared}) list(APPEND pcsx2IPUSources ${pcsx2IPUSourcesUnshared}) + list(APPEND pcsx2SPU2Sources ${pcsx2SPU2SourcesUnshared}) endif() # DebugTools sources diff --git a/pcsx2/SPU2/Reverb.cpp b/pcsx2/SPU2/Reverb.cpp index d5a077369eefea..a04b786943cfc6 100644 --- a/pcsx2/SPU2/Reverb.cpp +++ b/pcsx2/SPU2/Reverb.cpp @@ -19,7 +19,6 @@ #include - void V_Core::AnalyzeReverbPreset() { Console.WriteLn("Reverb Parameter Update for Core %d:", Index); @@ -55,193 +54,6 @@ void V_Core::AnalyzeReverbPreset() Console.WriteLn("----------------------------------------------------------"); } -static constexpr u32 NUM_TAPS = 39; -// 39 tap filter, the 0's could be optimized out -static constexpr std::array filter_down_coefs alignas(32) = { - -1, - 0, - 2, - 0, - -10, - 0, - 35, - 0, - -103, - 0, - 266, - 0, - -616, - 0, - 1332, - 0, - -2960, - 0, - 10246, - 16384, - 10246, - 0, - -2960, - 0, - 1332, - 0, - -616, - 0, - 266, - 0, - -103, - 0, - 35, - 0, - -10, - 0, - 2, - 0, - -1, -}; - -static constexpr std::array make_up_coefs() -{ - std::array ret = {}; - - for (u32 i = 0; i < NUM_TAPS; i++) - { - ret[i] = static_cast(std::clamp(filter_down_coefs[i] * 2, INT16_MIN, INT16_MAX)); - } - - return ret; -} - -static constexpr std::array filter_up_coefs alignas(32) = make_up_coefs(); - -s32 __forceinline V_Core::ReverbDownsample(bool right) -{ - int index = (RevbSampleBufPos - NUM_TAPS) & 63; - -#if _M_SSE >= 0x501 - auto c = GSVector8i::load(&filter_down_coefs[0]); - auto s = GSVector8i::load(&RevbDownBuf[right][index]); - auto acc = s.mul16hrs(c); - - c = GSVector8i::load(&filter_down_coefs[16]); - s = GSVector8i::load(&RevbDownBuf[right][index + 16]); - acc = acc.adds16(s.mul16hrs(c)); - - c = GSVector8i::load(&filter_down_coefs[32]); - s = GSVector8i::load(&RevbDownBuf[right][index + 32]); - acc = acc.adds16(s.mul16hrs(c)); - - acc = acc.adds16(acc.ba()); - - acc = acc.hadds16(acc); - acc = acc.hadds16(acc); - acc = acc.hadds16(acc); -#else - auto c = GSVector4i::load(&filter_down_coefs[0]); - auto s = GSVector4i::load(&RevbDownBuf[right][index]); - auto acc = s.mul16hrs(c); - - c = GSVector4i::load(&filter_down_coefs[8]); - s = GSVector4i::load(&RevbDownBuf[right][index + 8]); - acc = acc.adds16(s.mul16hrs(c)); - - c = GSVector4i::load(&filter_down_coefs[16]); - s = GSVector4i::load(&RevbDownBuf[right][index + 16]); - acc = acc.adds16(s.mul16hrs(c)); - - c = GSVector4i::load(&filter_down_coefs[24]); - s = GSVector4i::load(&RevbDownBuf[right][index + 24]); - acc = acc.adds16(s.mul16hrs(c)); - - c = GSVector4i::load(&filter_down_coefs[32]); - s = GSVector4i::load(&RevbDownBuf[right][index + 32]); - acc = acc.adds16(s.mul16hrs(c)); - - acc = acc.hadds16(acc); - acc = acc.hadds16(acc); - acc = acc.hadds16(acc); -#endif - - return acc.I16[0]; -} - -StereoOut32 __forceinline V_Core::ReverbUpsample() -{ - int index = (RevbSampleBufPos - NUM_TAPS) & 63; - -#if _M_SSE >= 0x501 - auto c = GSVector8i::load(&filter_up_coefs[0]); - auto l = GSVector8i::load(&RevbUpBuf[0][index]); - auto r = GSVector8i::load(&RevbUpBuf[1][index]); - - auto lacc = l.mul16hrs(c); - auto racc = r.mul16hrs(c); - - c = GSVector8i::load(&filter_up_coefs[16]); - l = GSVector8i::load(&RevbUpBuf[0][index + 16]); - r = GSVector8i::load(&RevbUpBuf[1][index + 16]); - lacc = lacc.adds16(l.mul16hrs(c)); - racc = racc.adds16(r.mul16hrs(c)); - - c = GSVector8i::load(&filter_up_coefs[32]); - l = GSVector8i::load(&RevbUpBuf[0][index + 32]); - r = GSVector8i::load(&RevbUpBuf[1][index + 32]); - lacc = lacc.adds16(l.mul16hrs(c)); - racc = racc.adds16(r.mul16hrs(c)); - - lacc = lacc.adds16(lacc.ba()); - racc = racc.adds16(racc.ba()); - - lacc = lacc.hadds16(lacc); - lacc = lacc.hadds16(lacc); - lacc = lacc.hadds16(lacc); - - racc = racc.hadds16(racc); - racc = racc.hadds16(racc); - racc = racc.hadds16(racc); -#else - auto c = GSVector4i::load(&filter_up_coefs[0]); - auto l = GSVector4i::load(&RevbUpBuf[0][index]); - auto r = GSVector4i::load(&RevbUpBuf[1][index]); - - auto lacc = l.mul16hrs(c); - auto racc = r.mul16hrs(c); - - c = GSVector4i::load(&filter_up_coefs[8]); - l = GSVector4i::load(&RevbUpBuf[0][index + 8]); - r = GSVector4i::load(&RevbUpBuf[1][index + 8]); - lacc = lacc.adds16(l.mul16hrs(c)); - racc = racc.adds16(r.mul16hrs(c)); - - c = GSVector4i::load(&filter_up_coefs[16]); - l = GSVector4i::load(&RevbUpBuf[0][index + 16]); - r = GSVector4i::load(&RevbUpBuf[1][index + 16]); - lacc = lacc.adds16(l.mul16hrs(c)); - racc = racc.adds16(r.mul16hrs(c)); - - c = GSVector4i::load(&filter_up_coefs[24]); - l = GSVector4i::load(&RevbUpBuf[0][index + 24]); - r = GSVector4i::load(&RevbUpBuf[1][index + 24]); - lacc = lacc.adds16(l.mul16hrs(c)); - racc = racc.adds16(r.mul16hrs(c)); - - c = GSVector4i::load(&filter_up_coefs[32]); - l = GSVector4i::load(&RevbUpBuf[0][index + 32]); - r = GSVector4i::load(&RevbUpBuf[1][index + 32]); - lacc = lacc.adds16(l.mul16hrs(c)); - racc = racc.adds16(r.mul16hrs(c)); - - lacc = lacc.hadds16(lacc); - lacc = lacc.hadds16(lacc); - lacc = lacc.hadds16(lacc); - - racc = racc.hadds16(racc); - racc = racc.hadds16(racc); - racc = racc.hadds16(racc); -#endif - - return {lacc.I16[0], racc.I16[0]}; -} - __forceinline s32 V_Core::RevbGetIndexer(s32 offset) { u32 start = EffectsStartA & 0x3f'ffff; @@ -323,7 +135,7 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input) s32 in, same, diff, apf1, apf2, out; #define MUL(x, y) ((x) * (y) >> 15) - in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(R)); + in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(*this, R)); same = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[same_src]) - _spu2mem[same_prv]) + _spu2mem[same_prv]; diff = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[diff_src]) - _spu2mem[diff_prv]) + _spu2mem[diff_prv]; @@ -352,5 +164,5 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input) RevbSampleBufPos = (RevbSampleBufPos + 1) & 63; - return ReverbUpsample(); + return ReverbUpsample(*this); } diff --git a/pcsx2/SPU2/ReverbResample.cpp b/pcsx2/SPU2/ReverbResample.cpp new file mode 100644 index 00000000000000..9618292a292171 --- /dev/null +++ b/pcsx2/SPU2/ReverbResample.cpp @@ -0,0 +1,257 @@ +#include "GS/GSVector.h" +#include "Global.h" + +MULTI_ISA_UNSHARED_START + +static constexpr u32 NUM_TAPS = 39; +// 39 tap filter, the 0's could be optimized out +static constexpr std::array filter_down_coefs alignas(32) = { + -1, + 0, + 2, + 0, + -10, + 0, + 35, + 0, + -103, + 0, + 266, + 0, + -616, + 0, + 1332, + 0, + -2960, + 0, + 10246, + 16384, + 10246, + 0, + -2960, + 0, + 1332, + 0, + -616, + 0, + 266, + 0, + -103, + 0, + 35, + 0, + -10, + 0, + 2, + 0, + -1, +}; + +static constexpr std::array make_up_coefs() +{ + std::array ret = {}; + + for (u32 i = 0; i < NUM_TAPS; i++) + { + ret[i] = static_cast(std::clamp(filter_down_coefs[i] * 2, INT16_MIN, INT16_MAX)); + } + + return ret; +} + +static constexpr std::array filter_up_coefs alignas(32) = make_up_coefs(); + +s32 __forceinline ReverbDownsample_reference(V_Core& core, bool right) +{ + int index = (core.RevbSampleBufPos - NUM_TAPS) & 63; + s32 out = 0; + + for (int i = 0; i < NUM_TAPS; i++) + { + out += core.RevbDownBuf[right][index + i] * filter_down_coefs[i]; + } + + out >>= 15; + + return clamp_mix(out); +} + +#if _M_SSE >= 0x501 +s32 __forceinline ReverbDownsample_avx(V_Core& core, bool right) +{ + int index = (core.RevbSampleBufPos - NUM_TAPS) & 63; + + auto c = GSVector8i::load(&filter_down_coefs[0]); + auto s = GSVector8i::load(&core.RevbDownBuf[right][index]); + auto acc = s.mul16hrs(c); + + c = GSVector8i::load(&filter_down_coefs[16]); + s = GSVector8i::load(&core.RevbDownBuf[right][index + 16]); + acc = acc.adds16(s.mul16hrs(c)); + + c = GSVector8i::load(&filter_down_coefs[32]); + s = GSVector8i::load(&core.RevbDownBuf[right][index + 32]); + acc = acc.adds16(s.mul16hrs(c)); + + acc = acc.adds16(acc.ba()); + + acc = acc.hadds16(acc); + acc = acc.hadds16(acc); + acc = acc.hadds16(acc); + + return acc.I16[0]; +} +#endif + +s32 __forceinline ReverbDownsample_sse(V_Core& core, bool right) +{ + int index = (core.RevbSampleBufPos - NUM_TAPS) & 63; + + auto c = GSVector4i::load(&filter_down_coefs[0]); + auto s = GSVector4i::load(&core.RevbDownBuf[right][index]); + auto acc = s.mul16hrs(c); + + c = GSVector4i::load(&filter_down_coefs[8]); + s = GSVector4i::load(&core.RevbDownBuf[right][index + 8]); + acc = acc.adds16(s.mul16hrs(c)); + + c = GSVector4i::load(&filter_down_coefs[16]); + s = GSVector4i::load(&core.RevbDownBuf[right][index + 16]); + acc = acc.adds16(s.mul16hrs(c)); + + c = GSVector4i::load(&filter_down_coefs[24]); + s = GSVector4i::load(&core.RevbDownBuf[right][index + 24]); + acc = acc.adds16(s.mul16hrs(c)); + + c = GSVector4i::load(&filter_down_coefs[32]); + s = GSVector4i::load(&core.RevbDownBuf[right][index + 32]); + acc = acc.adds16(s.mul16hrs(c)); + + acc = acc.hadds16(acc); + acc = acc.hadds16(acc); + acc = acc.hadds16(acc); + + return acc.I16[0]; +} + +s32 __forceinline ReverbDownsample(V_Core& core, bool right) +{ +#if _M_SSE >= 0x501 + return ReverbDownsample_avx(core, right); +#else + return ReverbDownsample_sse(core, right); +#endif +} + +StereoOut32 __forceinline ReverbUpsample_reference(V_Core& core) +{ + int index = (core.RevbSampleBufPos - NUM_TAPS) & 63; + s32 l = 0, r = 0; + + for (int i = 0; i < NUM_TAPS; i++) + { + l += core.RevbUpBuf[0][index + i] * filter_up_coefs[i]; + r += core.RevbUpBuf[1][index + i] * filter_up_coefs[i]; + } + + l >>= 15; + r >>= 15; + + return {clamp_mix(l), clamp_mix(r)}; +} + +#if _M_SSE >= 0x501 +StereoOut32 __forceinline ReverbUpsample_avx(V_Core& core) +{ + int index = (core.RevbSampleBufPos - NUM_TAPS) & 63; + + auto c = GSVector8i::load(&filter_up_coefs[0]); + auto l = GSVector8i::load(&core.RevbUpBuf[0][index]); + auto r = GSVector8i::load(&core.RevbUpBuf[1][index]); + + auto lacc = l.mul16hrs(c); + auto racc = r.mul16hrs(c); + + c = GSVector8i::load(&filter_up_coefs[16]); + l = GSVector8i::load(&core.RevbUpBuf[0][index + 16]); + r = GSVector8i::load(&core.RevbUpBuf[1][index + 16]); + lacc = lacc.adds16(l.mul16hrs(c)); + racc = racc.adds16(r.mul16hrs(c)); + + c = GSVector8i::load(&filter_up_coefs[32]); + l = GSVector8i::load(&core.RevbUpBuf[0][index + 32]); + r = GSVector8i::load(&core.RevbUpBuf[1][index + 32]); + lacc = lacc.adds16(l.mul16hrs(c)); + racc = racc.adds16(r.mul16hrs(c)); + + lacc = lacc.adds16(lacc.ba()); + racc = racc.adds16(racc.ba()); + + lacc = lacc.hadds16(lacc); + lacc = lacc.hadds16(lacc); + lacc = lacc.hadds16(lacc); + + racc = racc.hadds16(racc); + racc = racc.hadds16(racc); + racc = racc.hadds16(racc); + + return {lacc.I16[0], racc.I16[0]}; +} +#endif + +StereoOut32 __forceinline ReverbUpsample_sse(V_Core& core) +{ + int index = (core.RevbSampleBufPos - NUM_TAPS) & 63; + + auto c = GSVector4i::load(&filter_up_coefs[0]); + auto l = GSVector4i::load(&core.RevbUpBuf[0][index]); + auto r = GSVector4i::load(&core.RevbUpBuf[1][index]); + + auto lacc = l.mul16hrs(c); + auto racc = r.mul16hrs(c); + + c = GSVector4i::load(&filter_up_coefs[8]); + l = GSVector4i::load(&core.RevbUpBuf[0][index + 8]); + r = GSVector4i::load(&core.RevbUpBuf[1][index + 8]); + lacc = lacc.adds16(l.mul16hrs(c)); + racc = racc.adds16(r.mul16hrs(c)); + + c = GSVector4i::load(&filter_up_coefs[16]); + l = GSVector4i::load(&core.RevbUpBuf[0][index + 16]); + r = GSVector4i::load(&core.RevbUpBuf[1][index + 16]); + lacc = lacc.adds16(l.mul16hrs(c)); + racc = racc.adds16(r.mul16hrs(c)); + + c = GSVector4i::load(&filter_up_coefs[24]); + l = GSVector4i::load(&core.RevbUpBuf[0][index + 24]); + r = GSVector4i::load(&core.RevbUpBuf[1][index + 24]); + lacc = lacc.adds16(l.mul16hrs(c)); + racc = racc.adds16(r.mul16hrs(c)); + + c = GSVector4i::load(&filter_up_coefs[32]); + l = GSVector4i::load(&core.RevbUpBuf[0][index + 32]); + r = GSVector4i::load(&core.RevbUpBuf[1][index + 32]); + lacc = lacc.adds16(l.mul16hrs(c)); + racc = racc.adds16(r.mul16hrs(c)); + + lacc = lacc.hadds16(lacc); + lacc = lacc.hadds16(lacc); + lacc = lacc.hadds16(lacc); + + racc = racc.hadds16(racc); + racc = racc.hadds16(racc); + racc = racc.hadds16(racc); + + return {lacc.I16[0], racc.I16[0]}; +} + +StereoOut32 __forceinline ReverbUpsample(V_Core& core) +{ +#if _M_SSE >= 0x501 + return ReverbUpsample_avx(core); +#else + return ReverbUpsample_sse(core); +#endif +} + +MULTI_ISA_UNSHARED_END diff --git a/pcsx2/SPU2/defs.h b/pcsx2/SPU2/defs.h index 2c230fde98a51e..15c2ec74b8ef83 100644 --- a/pcsx2/SPU2/defs.h +++ b/pcsx2/SPU2/defs.h @@ -19,6 +19,8 @@ #include "SPU2/SndOut.h" #include "SPU2/Global.h" +#include "GS/MultiISA.h" + #include // -------------------------------------------------------------------------------------- @@ -494,9 +496,6 @@ struct V_Core StereoOut32 DoReverb(const StereoOut32& Input); s32 RevbGetIndexer(s32 offset); - s32 ReverbDownsample(bool right); - StereoOut32 ReverbUpsample(); - StereoOut32 ReadInput(); StereoOut32 ReadInput_HiFi(); @@ -545,6 +544,14 @@ struct V_Core void FinishDMAwrite(); }; +MULTI_ISA_DEF( + StereoOut32 ReverbUpsample(V_Core& core); + s32 ReverbDownsample(V_Core& core, bool right); +) + +extern StereoOut32 (*ReverbUpsample)(V_Core& core); +extern s32 (*ReverbDownsample)(V_Core& core, bool right); + extern V_Core Cores[2]; extern V_SPDIF Spdif; diff --git a/pcsx2/SPU2/spu2sys.cpp b/pcsx2/SPU2/spu2sys.cpp index 8cd2fc0a1ff184..875bebeb42b3a1 100644 --- a/pcsx2/SPU2/spu2sys.cpp +++ b/pcsx2/SPU2/spu2sys.cpp @@ -46,6 +46,9 @@ int PlayMode; static bool has_to_call_irq[2] = { false, false }; static bool has_to_call_irq_dma[2] = { false, false }; +StereoOut32 (*ReverbUpsample)(V_Core& core); +s32 (*ReverbDownsample)(V_Core& core, bool right); + static bool psxmode = false; @@ -111,6 +114,9 @@ void V_Core::Init(int index) if (SPU2::MsgToConsole()) SPU2::ConLog("* SPU2: Init SPU2 core %d \n", index); + ReverbDownsample = MULTI_ISA_SELECT(ReverbDownsample); + ReverbUpsample = MULTI_ISA_SELECT(ReverbUpsample); + //memset(this, 0, sizeof(V_Core)); // Explicitly initializing variables instead. Mute = false;