PCSX2 · refractionpcsx2 · Oct 19, 2023 · Oct 17, 2023 · Oct 17, 2023 · Oct 17, 2023
diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt
@@ -280,6 +280,10 @@ set(pcsx2SPU2Sources
 	SPU2/Wavedump_wav.cpp
 )
 
+set(pcsx2SPU2SourcesUnshared
+	SPU2/ReverbResample.cpp
+)
+
 # SPU2 headers
 set(pcsx2SPU2Headers
 	SPU2/Debug.h
@@ -762,7 +766,7 @@ if(DISABLE_ADVANCE_SIMD)
 	# Note: ld64 (macOS's linker) does not act the same way when presented with .a files, unless linked with `-force_load` (cmake WHOLE_ARCHIVE).
 	set(is_first_isa "1")
 	foreach(isa "sse4" "avx" "avx2")
-		add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared})
+		add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared} ${pcsx2SPU2SourcesUnshared})
 		target_link_libraries(GS-${isa} PRIVATE PCSX2_FLAGS)
 		target_compile_definitions(GS-${isa} PRIVATE MULTI_ISA_UNSHARED_COMPILATION=isa_${isa} MULTI_ISA_IS_FIRST=${is_first_isa} ${pcsx2_defs_${isa}})
 		target_compile_options(GS-${isa} PRIVATE ${compile_options_${isa}})
@@ -778,6 +782,7 @@ if(DISABLE_ADVANCE_SIMD)
 else()
 	list(APPEND pcsx2GSSources ${pcsx2GSSourcesUnshared})
 	list(APPEND pcsx2IPUSources ${pcsx2IPUSourcesUnshared})
+	list(APPEND pcsx2SPU2Sources ${pcsx2SPU2SourcesUnshared})
 endif()
 
 # DebugTools sources

diff --git a/pcsx2/GS/GSVector4i.h b/pcsx2/GS/GSVector4i.h
@@ -846,6 +846,11 @@ class alignas(16) GSVector4i
 		return GSVector4i(_mm_adds_epi16(m, v.m));
 	}
 
+	__forceinline GSVector4i hadds16(const GSVector4i& v) const
+	{
+		return GSVector4i(_mm_hadds_epi16(m, v.m));
+	}
+
 	__forceinline GSVector4i addus8(const GSVector4i& v) const
 	{
 		return GSVector4i(_mm_adds_epu8(m, v.m));

diff --git a/pcsx2/GS/GSVector8i.h b/pcsx2/GS/GSVector8i.h
@@ -765,6 +765,11 @@ class alignas(32) GSVector8i
 		return GSVector8i(_mm256_adds_epi16(m, v.m));
 	}
 
+	__forceinline GSVector8i hadds16(const GSVector8i& v) const
+	{
+		return GSVector8i(_mm256_hadds_epi16(m, v.m));
+	}
+
 	__forceinline GSVector8i addus8(const GSVector8i& v) const
 	{
 		return GSVector8i(_mm256_adds_epu8(m, v.m));

diff --git a/pcsx2/SPU2/Mixer.cpp b/pcsx2/SPU2/Mixer.cpp
@@ -28,16 +28,6 @@ static const s32 tbl_XA_Factor[16][2] =
 		{98, -55},
 		{122, -60}};
 
-__forceinline s32 clamp_mix(s32 x)
-{
-	return std::clamp(x, -0x8000, 0x7fff);
-}
-
-__forceinline StereoOut32 clamp_mix(StereoOut32 sample)
-{
-	return StereoOut32(clamp_mix(sample.Left), clamp_mix(sample.Right));
-}
-
 static void __forceinline XA_decode_block(s16* buffer, const s16* block, s32& prev1, s32& prev2)
 {
 	const s32 header = *block;

diff --git a/pcsx2/SPU2/Mixer.h b/pcsx2/SPU2/Mixer.h
@@ -16,5 +16,3 @@
 #pragma once
 
 extern void Mix();
-extern s32 clamp_mix(s32 x);
-extern StereoOut32 clamp_mix(StereoOut32 sample);
diff --git a/pcsx2/SPU2/Reverb.cpp b/pcsx2/SPU2/Reverb.cpp
@@ -15,8 +15,9 @@
 
 #include "PrecompiledHeader.h"
 #include "Global.h"
-#include <array>
+#include "GS/GSVector.h"
 
+#include <array>
 
 void V_Core::AnalyzeReverbPreset()
 {
@@ -53,98 +54,6 @@ void V_Core::AnalyzeReverbPreset()
 	Console.WriteLn("----------------------------------------------------------");
 }
 
-static constexpr u32 NUM_TAPS = 39;
-// 39 tap filter, the 0's could be optimized out
-static constexpr std::array<s32, NUM_TAPS> filter_coefs = {
-	-1,
-	0,
-	2,
-	0,
-	-10,
-	0,
-	35,
-	0,
-	-103,
-	0,
-	266,
-	0,
-	-616,
-	0,
-	1332,
-	0,
-	-2960,
-	0,
-	10246,
-	16384,
-	10246,
-	0,
-	-2960,
-	0,
-	1332,
-	0,
-	-616,
-	0,
-	266,
-	0,
-	-103,
-	0,
-	35,
-	0,
-	-10,
-	0,
-	2,
-	0,
-	-1,
-};
-
-s32 __forceinline V_Core::ReverbDownsample(bool right)
-{
-	s32 out = 0;
-
-	// Skipping the 0 coefs.
-	for (u32 i = 0; i < NUM_TAPS; i += 2)
-	{
-		out += RevbDownBuf[right][((RevbSampleBufPos - NUM_TAPS) + i) & 63] * filter_coefs[i];
-	}
-
-	// We also skipped the middle so add that in.
-	out += RevbDownBuf[right][((RevbSampleBufPos - NUM_TAPS) + 19) & 63] * filter_coefs[19];
-
-	out >>= 15;
-	out = std::clamp<s32>(out, INT16_MIN, INT16_MAX);
-
-	return out;
-}
-
-StereoOut32 __forceinline V_Core::ReverbUpsample(bool phase)
-{
-	s32 ls = 0, rs = 0;
-
-	if (phase)
-	{
-		ls += RevbUpBuf[0][(((RevbSampleBufPos - NUM_TAPS) >> 1) + 9) & 63] * filter_coefs[19];
-		rs += RevbUpBuf[1][(((RevbSampleBufPos - NUM_TAPS) >> 1) + 9) & 63] * filter_coefs[19];
-	}
-	else
-	{
-		for (u32 i = 0; i < (NUM_TAPS >> 1) + 1; i++)
-		{
-			ls += RevbUpBuf[0][(((RevbSampleBufPos - NUM_TAPS) >> 1) + i) & 63] * filter_coefs[i * 2];
-		}
-		for (u32 i = 0; i < (NUM_TAPS >> 1) + 1; i++)
-		{
-			rs += RevbUpBuf[1][(((RevbSampleBufPos - NUM_TAPS) >> 1) + i) & 63] * filter_coefs[i * 2];
-		}
-	}
-
-	ls >>= 14;
-	ls = std::clamp<s32>(ls, INT16_MIN, INT16_MAX);
-	rs >>= 14;
-	rs = std::clamp<s32>(rs, INT16_MIN, INT16_MAX);
-
-	return {ls, rs};
-}
-
 __forceinline s32 V_Core::RevbGetIndexer(s32 offset)
 {
 	u32 start = EffectsStartA & 0x3f'ffff;
@@ -157,15 +66,19 @@ __forceinline s32 V_Core::RevbGetIndexer(s32 offset)
 	return x & 0xf'ffff;
 }
 
-StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
+StereoOut32 V_Core::DoReverb(StereoOut32 Input)
 {
 	if (EffectsStartA >= EffectsEndA)
 	{
 		return StereoOut32::Empty;
 	}
 
-	RevbDownBuf[0][RevbSampleBufPos & 63] = Input.Left;
-	RevbDownBuf[1][RevbSampleBufPos & 63] = Input.Right;
+	Input = clamp_mix(Input);
+
+	RevbDownBuf[0][RevbSampleBufPos] = Input.Left;
+	RevbDownBuf[1][RevbSampleBufPos] = Input.Right;
+	RevbDownBuf[0][RevbSampleBufPos | 64] = Input.Left;
+	RevbDownBuf[1][RevbSampleBufPos | 64] = Input.Right;
 
 	bool R = Cycles & 1;
 
@@ -224,7 +137,7 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
 	s32 in, same, diff, apf1, apf2, out;
 
 #define MUL(x, y) ((x) * (y) >> 15)
-	in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(R));
+	in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(*this, R));
 
 	same = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[same_src]) - _spu2mem[same_prv]) + _spu2mem[same_prv];
 	diff = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[diff_src]) - _spu2mem[diff_prv]) + _spu2mem[diff_prv];
@@ -245,9 +158,15 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
 		_spu2mem[apf2_dst] = clamp_mix(apf2);
 	}
 
-	RevbUpBuf[R][(RevbSampleBufPos >> 1) & 63] = clamp_mix(out);
+	out = clamp_mix(out);
+
+	RevbUpBuf[R][RevbSampleBufPos] = out;
+	RevbUpBuf[!R][RevbSampleBufPos] = 0;
+
+	RevbUpBuf[R][RevbSampleBufPos | 64] = out;
+	RevbUpBuf[!R][RevbSampleBufPos | 64] = 0;
 
-	RevbSampleBufPos++;
+	RevbSampleBufPos = (RevbSampleBufPos + 1) & 63;
 
-	return ReverbUpsample(RevbSampleBufPos & 1);
+	return ReverbUpsample(*this);
 }