Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPU2: Optimize reverb resampling #10134

Merged
merged 5 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pcsx2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ set(pcsx2SPU2Sources
SPU2/Wavedump_wav.cpp
)

set(pcsx2SPU2SourcesUnshared
SPU2/ReverbResample.cpp
)

# SPU2 headers
set(pcsx2SPU2Headers
SPU2/Debug.h
Expand Down Expand Up @@ -762,7 +766,7 @@ if(DISABLE_ADVANCE_SIMD)
# Note: ld64 (macOS's linker) does not act the same way when presented with .a files, unless linked with `-force_load` (cmake WHOLE_ARCHIVE).
set(is_first_isa "1")
foreach(isa "sse4" "avx" "avx2")
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared})
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared} ${pcsx2SPU2SourcesUnshared})
target_link_libraries(GS-${isa} PRIVATE PCSX2_FLAGS)
target_compile_definitions(GS-${isa} PRIVATE MULTI_ISA_UNSHARED_COMPILATION=isa_${isa} MULTI_ISA_IS_FIRST=${is_first_isa} ${pcsx2_defs_${isa}})
target_compile_options(GS-${isa} PRIVATE ${compile_options_${isa}})
Expand All @@ -778,6 +782,7 @@ if(DISABLE_ADVANCE_SIMD)
else()
list(APPEND pcsx2GSSources ${pcsx2GSSourcesUnshared})
list(APPEND pcsx2IPUSources ${pcsx2IPUSourcesUnshared})
list(APPEND pcsx2SPU2Sources ${pcsx2SPU2SourcesUnshared})
endif()

# DebugTools sources
Expand Down
5 changes: 5 additions & 0 deletions pcsx2/GS/GSVector4i.h
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,11 @@ class alignas(16) GSVector4i
return GSVector4i(_mm_adds_epi16(m, v.m));
}

__forceinline GSVector4i hadds16(const GSVector4i& v) const
{
return GSVector4i(_mm_hadds_epi16(m, v.m));
}

__forceinline GSVector4i addus8(const GSVector4i& v) const
{
return GSVector4i(_mm_adds_epu8(m, v.m));
Expand Down
5 changes: 5 additions & 0 deletions pcsx2/GS/GSVector8i.h
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,11 @@ class alignas(32) GSVector8i
return GSVector8i(_mm256_adds_epi16(m, v.m));
}

__forceinline GSVector8i hadds16(const GSVector8i& v) const
{
return GSVector8i(_mm256_hadds_epi16(m, v.m));
}

__forceinline GSVector8i addus8(const GSVector8i& v) const
{
return GSVector8i(_mm256_adds_epu8(m, v.m));
Expand Down
10 changes: 0 additions & 10 deletions pcsx2/SPU2/Mixer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,6 @@ static const s32 tbl_XA_Factor[16][2] =
{98, -55},
{122, -60}};

__forceinline s32 clamp_mix(s32 x)
{
return std::clamp(x, -0x8000, 0x7fff);
}

__forceinline StereoOut32 clamp_mix(StereoOut32 sample)
{
return StereoOut32(clamp_mix(sample.Left), clamp_mix(sample.Right));
}

static void __forceinline XA_decode_block(s16* buffer, const s16* block, s32& prev1, s32& prev2)
{
const s32 header = *block;
Expand Down
2 changes: 0 additions & 2 deletions pcsx2/SPU2/Mixer.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,3 @@
#pragma once

extern void Mix();
extern s32 clamp_mix(s32 x);
extern StereoOut32 clamp_mix(StereoOut32 sample);
119 changes: 19 additions & 100 deletions pcsx2/SPU2/Reverb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@

#include "PrecompiledHeader.h"
#include "Global.h"
#include <array>
#include "GS/GSVector.h"

#include <array>

void V_Core::AnalyzeReverbPreset()
{
Expand Down Expand Up @@ -53,98 +54,6 @@ void V_Core::AnalyzeReverbPreset()
Console.WriteLn("----------------------------------------------------------");
}

static constexpr u32 NUM_TAPS = 39;
// 39 tap filter, the 0's could be optimized out
static constexpr std::array<s32, NUM_TAPS> filter_coefs = {
-1,
0,
2,
0,
-10,
0,
35,
0,
-103,
0,
266,
0,
-616,
0,
1332,
0,
-2960,
0,
10246,
16384,
10246,
0,
-2960,
0,
1332,
0,
-616,
0,
266,
0,
-103,
0,
35,
0,
-10,
0,
2,
0,
-1,
};

s32 __forceinline V_Core::ReverbDownsample(bool right)
{
s32 out = 0;

// Skipping the 0 coefs.
for (u32 i = 0; i < NUM_TAPS; i += 2)
{
out += RevbDownBuf[right][((RevbSampleBufPos - NUM_TAPS) + i) & 63] * filter_coefs[i];
}

// We also skipped the middle so add that in.
out += RevbDownBuf[right][((RevbSampleBufPos - NUM_TAPS) + 19) & 63] * filter_coefs[19];

out >>= 15;
out = std::clamp<s32>(out, INT16_MIN, INT16_MAX);

return out;
}

StereoOut32 __forceinline V_Core::ReverbUpsample(bool phase)
{
s32 ls = 0, rs = 0;

if (phase)
{
ls += RevbUpBuf[0][(((RevbSampleBufPos - NUM_TAPS) >> 1) + 9) & 63] * filter_coefs[19];
rs += RevbUpBuf[1][(((RevbSampleBufPos - NUM_TAPS) >> 1) + 9) & 63] * filter_coefs[19];
}
else
{
for (u32 i = 0; i < (NUM_TAPS >> 1) + 1; i++)
{
ls += RevbUpBuf[0][(((RevbSampleBufPos - NUM_TAPS) >> 1) + i) & 63] * filter_coefs[i * 2];
}
for (u32 i = 0; i < (NUM_TAPS >> 1) + 1; i++)
{
rs += RevbUpBuf[1][(((RevbSampleBufPos - NUM_TAPS) >> 1) + i) & 63] * filter_coefs[i * 2];
}
}

ls >>= 14;
ls = std::clamp<s32>(ls, INT16_MIN, INT16_MAX);
rs >>= 14;
rs = std::clamp<s32>(rs, INT16_MIN, INT16_MAX);

return {ls, rs};
}

__forceinline s32 V_Core::RevbGetIndexer(s32 offset)
{
u32 start = EffectsStartA & 0x3f'ffff;
Expand All @@ -157,15 +66,19 @@ __forceinline s32 V_Core::RevbGetIndexer(s32 offset)
return x & 0xf'ffff;
}

StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
StereoOut32 V_Core::DoReverb(StereoOut32 Input)
{
if (EffectsStartA >= EffectsEndA)
{
return StereoOut32::Empty;
}

RevbDownBuf[0][RevbSampleBufPos & 63] = Input.Left;
RevbDownBuf[1][RevbSampleBufPos & 63] = Input.Right;
Input = clamp_mix(Input);

RevbDownBuf[0][RevbSampleBufPos] = Input.Left;
RevbDownBuf[1][RevbSampleBufPos] = Input.Right;
RevbDownBuf[0][RevbSampleBufPos | 64] = Input.Left;
RevbDownBuf[1][RevbSampleBufPos | 64] = Input.Right;

bool R = Cycles & 1;

Expand Down Expand Up @@ -224,7 +137,7 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
s32 in, same, diff, apf1, apf2, out;

#define MUL(x, y) ((x) * (y) >> 15)
in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(R));
in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(*this, R));

same = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[same_src]) - _spu2mem[same_prv]) + _spu2mem[same_prv];
diff = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[diff_src]) - _spu2mem[diff_prv]) + _spu2mem[diff_prv];
Expand All @@ -245,9 +158,15 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
_spu2mem[apf2_dst] = clamp_mix(apf2);
}

RevbUpBuf[R][(RevbSampleBufPos >> 1) & 63] = clamp_mix(out);
out = clamp_mix(out);

RevbUpBuf[R][RevbSampleBufPos] = out;
RevbUpBuf[!R][RevbSampleBufPos] = 0;

RevbUpBuf[R][RevbSampleBufPos | 64] = out;
RevbUpBuf[!R][RevbSampleBufPos | 64] = 0;

RevbSampleBufPos++;
RevbSampleBufPos = (RevbSampleBufPos + 1) & 63;

return ReverbUpsample(RevbSampleBufPos & 1);
return ReverbUpsample(*this);
}
Loading