Skip to content

Commit

Permalink
SPU2: Multi-isa resampling functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Ziemas committed Oct 17, 2023
1 parent 76fd2c5 commit fa4501a
Show file tree
Hide file tree
Showing 6 changed files with 283 additions and 195 deletions.
7 changes: 6 additions & 1 deletion pcsx2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ set(pcsx2SPU2Sources
SPU2/Wavedump_wav.cpp
)

set(pcsx2SPU2SourcesUnshared
SPU2/ReverbResample.cpp
)

# SPU2 headers
set(pcsx2SPU2Headers
SPU2/Debug.h
Expand Down Expand Up @@ -762,7 +766,7 @@ if(DISABLE_ADVANCE_SIMD)
# Note: ld64 (macOS's linker) does not act the same way when presented with .a files, unless linked with `-force_load` (cmake WHOLE_ARCHIVE).
set(is_first_isa "1")
foreach(isa "sse4" "avx" "avx2")
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared})
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared} ${pcsx2SPU2SourcesUnshared})
target_link_libraries(GS-${isa} PRIVATE PCSX2_FLAGS)
target_compile_definitions(GS-${isa} PRIVATE MULTI_ISA_UNSHARED_COMPILATION=isa_${isa} MULTI_ISA_IS_FIRST=${is_first_isa} ${pcsx2_defs_${isa}})
target_compile_options(GS-${isa} PRIVATE ${compile_options_${isa}})
Expand All @@ -778,6 +782,7 @@ if(DISABLE_ADVANCE_SIMD)
else()
list(APPEND pcsx2GSSources ${pcsx2GSSourcesUnshared})
list(APPEND pcsx2IPUSources ${pcsx2IPUSourcesUnshared})
list(APPEND pcsx2SPU2Sources ${pcsx2SPU2SourcesUnshared})
endif()

# DebugTools sources
Expand Down
192 changes: 2 additions & 190 deletions pcsx2/SPU2/Reverb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

#include <array>


void V_Core::AnalyzeReverbPreset()
{
Console.WriteLn("Reverb Parameter Update for Core %d:", Index);
Expand Down Expand Up @@ -55,193 +54,6 @@ void V_Core::AnalyzeReverbPreset()
Console.WriteLn("----------------------------------------------------------");
}

static constexpr u32 NUM_TAPS = 39;
// 39 tap filter, the 0's could be optimized out
static constexpr std::array<s16, 48> filter_down_coefs alignas(32) = {
-1,
0,
2,
0,
-10,
0,
35,
0,
-103,
0,
266,
0,
-616,
0,
1332,
0,
-2960,
0,
10246,
16384,
10246,
0,
-2960,
0,
1332,
0,
-616,
0,
266,
0,
-103,
0,
35,
0,
-10,
0,
2,
0,
-1,
};

static constexpr std::array<s16, 48> make_up_coefs()
{
std::array<s16, 48> ret = {};

for (u32 i = 0; i < NUM_TAPS; i++)
{
ret[i] = static_cast<s16>(std::clamp<s32>(filter_down_coefs[i] * 2, INT16_MIN, INT16_MAX));
}

return ret;
}

static constexpr std::array<s16, 48> filter_up_coefs alignas(32) = make_up_coefs();

s32 __forceinline V_Core::ReverbDownsample(bool right)
{
int index = (RevbSampleBufPos - NUM_TAPS) & 63;

#if _M_SSE >= 0x501
auto c = GSVector8i::load<true>(&filter_down_coefs[0]);
auto s = GSVector8i::load<false>(&RevbDownBuf[right][index]);
auto acc = s.mul16hrs(c);

c = GSVector8i::load<true>(&filter_down_coefs[16]);
s = GSVector8i::load<false>(&RevbDownBuf[right][index + 16]);
acc = acc.adds16(s.mul16hrs(c));

c = GSVector8i::load<true>(&filter_down_coefs[32]);
s = GSVector8i::load<false>(&RevbDownBuf[right][index + 32]);
acc = acc.adds16(s.mul16hrs(c));

acc = acc.adds16(acc.ba());

acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
#else
auto c = GSVector4i::load<true>(&filter_down_coefs[0]);
auto s = GSVector4i::load<false>(&RevbDownBuf[right][index]);
auto acc = s.mul16hrs(c);

c = GSVector4i::load<true>(&filter_down_coefs[8]);
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 8]);
acc = acc.adds16(s.mul16hrs(c));

c = GSVector4i::load<true>(&filter_down_coefs[16]);
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 16]);
acc = acc.adds16(s.mul16hrs(c));

c = GSVector4i::load<true>(&filter_down_coefs[24]);
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 24]);
acc = acc.adds16(s.mul16hrs(c));

c = GSVector4i::load<true>(&filter_down_coefs[32]);
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 32]);
acc = acc.adds16(s.mul16hrs(c));

acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
#endif

return acc.I16[0];
}

StereoOut32 __forceinline V_Core::ReverbUpsample()
{
int index = (RevbSampleBufPos - NUM_TAPS) & 63;

#if _M_SSE >= 0x501
auto c = GSVector8i::load<true>(&filter_up_coefs[0]);
auto l = GSVector8i::load<false>(&RevbUpBuf[0][index]);
auto r = GSVector8i::load<false>(&RevbUpBuf[1][index]);

auto lacc = l.mul16hrs(c);
auto racc = r.mul16hrs(c);

c = GSVector8i::load<true>(&filter_up_coefs[16]);
l = GSVector8i::load<false>(&RevbUpBuf[0][index + 16]);
r = GSVector8i::load<false>(&RevbUpBuf[1][index + 16]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));

c = GSVector8i::load<true>(&filter_up_coefs[32]);
l = GSVector8i::load<false>(&RevbUpBuf[0][index + 32]);
r = GSVector8i::load<false>(&RevbUpBuf[1][index + 32]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));

lacc = lacc.adds16(lacc.ba());
racc = racc.adds16(racc.ba());

lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);

racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
#else
auto c = GSVector4i::load<true>(&filter_up_coefs[0]);
auto l = GSVector4i::load<false>(&RevbUpBuf[0][index]);
auto r = GSVector4i::load<false>(&RevbUpBuf[1][index]);

auto lacc = l.mul16hrs(c);
auto racc = r.mul16hrs(c);

c = GSVector4i::load<true>(&filter_up_coefs[8]);
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 8]);
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 8]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));

c = GSVector4i::load<true>(&filter_up_coefs[16]);
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 16]);
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 16]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));

c = GSVector4i::load<true>(&filter_up_coefs[24]);
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 24]);
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 24]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));

c = GSVector4i::load<true>(&filter_up_coefs[32]);
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 32]);
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 32]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));

lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);

racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
#endif

return {lacc.I16[0], racc.I16[0]};
}

__forceinline s32 V_Core::RevbGetIndexer(s32 offset)
{
u32 start = EffectsStartA & 0x3f'ffff;
Expand Down Expand Up @@ -323,7 +135,7 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
s32 in, same, diff, apf1, apf2, out;

#define MUL(x, y) ((x) * (y) >> 15)
in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(R));
in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(*this, R));

same = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[same_src]) - _spu2mem[same_prv]) + _spu2mem[same_prv];
diff = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[diff_src]) - _spu2mem[diff_prv]) + _spu2mem[diff_prv];
Expand Down Expand Up @@ -352,5 +164,5 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)

RevbSampleBufPos = (RevbSampleBufPos + 1) & 63;

return ReverbUpsample();
return ReverbUpsample(*this);
}
Loading

0 comments on commit fa4501a

Please sign in to comment.