-
-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #632 from wheremyfoodat/more-dsp
WIP: Finishing DSP mixer
- Loading branch information
Showing
7 changed files
with
370 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
// Copyright 2016 Citra Emulator Project | ||
// Licensed under GPLv2 or any later version | ||
// Refer to the license.txt file included. | ||
|
||
#pragma once | ||
|
||
#include <array> | ||
#include <deque> | ||
|
||
#include "audio/hle_mixer.hpp" | ||
#include "helpers.hpp" | ||
|
||
namespace Audio::Interpolation { | ||
// A variable length buffer of signed PCM16 stereo samples. | ||
using StereoBuffer16 = std::deque<std::array<s16, 2>>; | ||
using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>; | ||
|
||
struct State { | ||
// Two historical samples. | ||
std::array<s16, 2> xn1 = {}; //< x[n-1] | ||
std::array<s16, 2> xn2 = {}; //< x[n-2] | ||
// Current fractional position. | ||
u64 fposition = 0; | ||
}; | ||
|
||
/** | ||
* No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay. | ||
* @param state Interpolation state. | ||
* @param input Input buffer. | ||
* @param rate Stretch factor. Must be a positive non-zero value. | ||
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling. | ||
* @param output The resampled audio buffer. | ||
* @param outputi The index of output to start writing to. | ||
*/ | ||
void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); | ||
|
||
/** | ||
* Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay. | ||
* @param state Interpolation state. | ||
* @param input Input buffer. | ||
* @param rate Stretch factor. Must be a positive non-zero value. | ||
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling. | ||
* @param output The resampled audio buffer. | ||
* @param outputi The index of output to start writing to. | ||
*/ | ||
void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); | ||
|
||
/** | ||
* Polyphase interpolation. This is currently stubbed to just perform linear interpolation | ||
* @param state Interpolation state. | ||
* @param input Input buffer. | ||
* @param rate Stretch factor. Must be a positive non-zero value. | ||
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling. | ||
* @param output The resampled audio buffer. | ||
* @param outputi The index of output to start writing to. | ||
*/ | ||
void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); | ||
} // namespace Audio::Interpolation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#pragma once | ||
|
||
#include "audio/hle_mixer.hpp" | ||
#include "compiler_builtins.hpp" | ||
#include "helpers.hpp" | ||
|
||
#if defined(_M_AMD64) || defined(__x86_64__) | ||
#define DSP_SIMD_X64 | ||
#include <immintrin.h> | ||
#elif defined(_M_ARM64) || defined(__aarch64__) | ||
#define DSP_SIMD_ARM64 | ||
#include <arm_neon.h> | ||
#endif | ||
|
||
// Optimized SIMD functions for mixing the stereo output of a DSP voice into a quadraphonic intermediate mix | ||
namespace DSP::MixIntoQuad { | ||
using IntermediateMix = Audio::DSPMixer::IntermediateMix; | ||
using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>; | ||
|
||
// Non-SIMD, portable algorithm | ||
ALWAYS_INLINE static void mixPortable(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { | ||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { | ||
// Mono samples are in the format: (l, r) | ||
// When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one | ||
mix[sampleIndex][0] += s32(frame[sampleIndex][0] * gains[0]); | ||
mix[sampleIndex][1] += s32(frame[sampleIndex][1] * gains[1]); | ||
mix[sampleIndex][2] += s32(frame[sampleIndex][0] * gains[2]); | ||
mix[sampleIndex][3] += s32(frame[sampleIndex][1] * gains[3]); | ||
} | ||
} | ||
|
||
#if defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) | ||
ALWAYS_INLINE static void mixSSE4_1(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { | ||
__m128 gains_ = _mm_load_ps(gains); | ||
|
||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { | ||
// The stereo samples, repeated every 4 bytes inside the vector register | ||
__m128i stereoSamples = _mm_castps_si128(_mm_load1_ps((float*)&frame[sampleIndex][0])); | ||
|
||
__m128 currentFrame = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(stereoSamples)); | ||
__m128i offset = _mm_cvttps_epi32(_mm_mul_ps(currentFrame, gains_)); | ||
__m128i intermediateMixPrev = _mm_load_si128((__m128i*)&mix[sampleIndex][0]); | ||
__m128i result = _mm_add_epi32(intermediateMixPrev, offset); | ||
_mm_store_si128((__m128i*)&mix[sampleIndex][0], result); | ||
} | ||
} | ||
#endif | ||
|
||
#ifdef DSP_SIMD_ARM64 | ||
ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { | ||
float32x4_t gains_ = vld1q_f32(gains); | ||
|
||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { | ||
// Load l and r samples and repeat them every 4 bytes | ||
int32x4_t stereoSamples = vld1q_dup_s32((s32*)&frame[sampleIndex][0]); | ||
// Expand the bottom 4 s16 samples into an int32x4 with sign extension, then convert them to float32x4 | ||
float32x4_t currentFrame = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_s32(stereoSamples)))); | ||
|
||
// Multiply samples by their respective gains, truncate the result, and add it into the intermediate mix buffer | ||
int32x4_t offset = vcvtq_s32_f32(vmulq_f32(currentFrame, gains_)); | ||
int32x4_t intermediateMixPrev = vld1q_s32((s32*)&mix[sampleIndex][0]); | ||
int32x4_t result = vaddq_s32(intermediateMixPrev, offset); | ||
vst1q_s32((s32*)&mix[sampleIndex][0], result); | ||
} | ||
} | ||
#endif | ||
|
||
// Mixes the stereo output of a DSP voice into a quadraphonic intermediate mix | ||
static void mix(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { | ||
#if defined(DSP_SIMD_ARM64) | ||
return mixNEON(mix, frame, gains); | ||
#elif defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) | ||
return mixSSE4_1(mix, frame, gains); | ||
#else | ||
return mixPortable(mix, frame, gains); | ||
#endif | ||
} | ||
} // namespace DSP::MixIntoQuad |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#pragma once | ||
#include <array> | ||
|
||
#include "audio/dsp_shared_mem.hpp" | ||
#include "helpers.hpp" | ||
|
||
namespace Audio { | ||
using SampleFormat = HLE::SourceConfiguration::Configuration::Format; | ||
using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo; | ||
|
||
class DSPMixer { | ||
public: | ||
template <typename T, usize channelCount = 1> | ||
using Sample = std::array<T, channelCount>; | ||
|
||
template <typename T, usize channelCount> | ||
using Frame = std::array<Sample<T, channelCount>, 160>; | ||
|
||
template <typename T> | ||
using MonoFrame = Frame<T, 1>; | ||
|
||
template <typename T> | ||
using StereoFrame = Frame<T, 2>; | ||
|
||
template <typename T> | ||
using QuadFrame = Frame<T, 4>; | ||
|
||
// Internally the DSP uses four channels when mixing. | ||
// Neatly, QuadFrame<s32> means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing | ||
using IntermediateMix = QuadFrame<s32>; | ||
|
||
private: | ||
using ChannelFormat = HLE::DspConfiguration::OutputFormat; | ||
// The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages | ||
// Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU | ||
static constexpr usize mixerStageCount = 3; | ||
|
||
public: | ||
ChannelFormat channelFormat = ChannelFormat::Stereo; | ||
std::array<float, mixerStageCount> volumes; | ||
std::array<bool, 2> enableAuxStages; | ||
|
||
void reset() { | ||
channelFormat = ChannelFormat::Stereo; | ||
|
||
volumes.fill(0.0); | ||
enableAuxStages.fill(false); | ||
} | ||
}; | ||
} // namespace Audio |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// Copyright 2016 Citra Emulator Project | ||
// Licensed under GPLv2 or any later version | ||
// Refer to the license.txt file included. | ||
|
||
#include "audio/audio_interpolation.hpp" | ||
|
||
#include <algorithm> | ||
|
||
#include "helpers.hpp" | ||
|
||
namespace Audio::Interpolation { | ||
// Calculations are done in fixed point with 24 fractional bits. | ||
// (This is not verified. This was chosen for minimal error.) | ||
static constexpr u64 scaleFactor = 1 << 24; | ||
static constexpr u64 scaleMask = scaleFactor - 1; | ||
|
||
/// Here we step over the input in steps of rate, until we consume all of the input. | ||
/// Three adjacent samples are passed to fn each step. | ||
template <typename Function> | ||
static void stepOverSamples(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi, Function fn) { | ||
if (input.empty()) { | ||
return; | ||
} | ||
|
||
input.insert(input.begin(), {state.xn2, state.xn1}); | ||
|
||
const u64 step_size = static_cast<u64>(rate * scaleFactor); | ||
u64 fposition = state.fposition; | ||
usize inputi = 0; | ||
|
||
while (outputi < output.size()) { | ||
inputi = static_cast<usize>(fposition / scaleFactor); | ||
|
||
if (inputi + 2 >= input.size()) { | ||
inputi = input.size() - 2; | ||
break; | ||
} | ||
|
||
u64 fraction = fposition & scaleMask; | ||
output[outputi++] = fn(fraction, input[inputi], input[inputi + 1], input[inputi + 2]); | ||
|
||
fposition += step_size; | ||
} | ||
|
||
state.xn2 = input[inputi]; | ||
state.xn1 = input[inputi + 1]; | ||
state.fposition = fposition - inputi * scaleFactor; | ||
|
||
input.erase(input.begin(), std::next(input.begin(), inputi + 2)); | ||
} | ||
|
||
void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { | ||
stepOverSamples(state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { return x0; }); | ||
} | ||
|
||
void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { | ||
// Note on accuracy: Some values that this produces are +/- 1 from the actual firmware. | ||
stepOverSamples(state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { | ||
// This is a saturated subtraction. (Verified by black-box fuzzing.) | ||
s64 delta0 = std::clamp<s64>(x1[0] - x0[0], -32768, 32767); | ||
s64 delta1 = std::clamp<s64>(x1[1] - x0[1], -32768, 32767); | ||
|
||
return std::array<s16, 2>{ | ||
static_cast<s16>(x0[0] + fraction * delta0 / scaleFactor), | ||
static_cast<s16>(x0[1] + fraction * delta1 / scaleFactor), | ||
}; | ||
}); | ||
} | ||
|
||
void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { | ||
linear(state, input, rate, output, outputi); | ||
} | ||
} // namespace Audio::Interpolation |
Oops, something went wrong.