From 07cee43a2bb0878b178d850b96981c5f661aa5ac Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 3 Nov 2024 19:50:27 +0200 Subject: [PATCH 01/14] HLE DSP: Implement per-voice mixing stage --- include/audio/hle_core.hpp | 14 +++++++++++--- src/core/audio/hle_core.cpp | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index bd7172379..d05e9808e 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -47,14 +47,17 @@ namespace Audio { // Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque? using SampleBuffer = std::deque>; - using BufferQueue = std::priority_queue; + BufferQueue buffers; SampleFormat sampleFormat = SampleFormat::ADPCM; SourceType sourceType = SourceType::Stereo; - std::array gain0, gain1, gain2; + // There's one gain configuration for each of the 3 intermediate mixing stages + // And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample + std::array, 3> gains; + u32 samplePosition; // Sample number into the current audio buffer float rateMultiplier; u16 syncCount; @@ -112,6 +115,10 @@ namespace Audio { template using QuadFrame = Frame; + // Internally the DSP uses four channels when mixing. + // Neatly, QuadFrame means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing + using IntermediateMix = QuadFrame; + private: using ChannelFormat = HLE::DspConfiguration::OutputFormat; // The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages @@ -151,7 +158,8 @@ namespace Audio { using Source = Audio::DSPSource; using SampleBuffer = Source::SampleBuffer; - + using IntermediateMix = DSPMixer::IntermediateMix; + private: enum class DSPState : u32 { Off, diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 85eee97a5..9870adef1 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -228,6 +228,7 @@ namespace Audio { // The DSP checks the DSP configuration dirty bits on every frame, applies them, and clears them read.dspConfiguration.dirtyRaw = 0; read.dspConfiguration.dirtyRaw2 = 0; + std::array mixes{}; for (int i = 0; i < sourceCount; i++) { // Update source configuration from the read region of shared memory @@ -250,6 +251,24 @@ namespace Audio { status.samplePosition = source.samplePosition; source.isBufferIDDirty = false; + + // If the source is still enabled, mix its output into the intermediate mix buffers + if (source.enabled) { + for (int mix = 0; mix < mixes.size(); mix++) { + IntermediateMix& intermediateMix = mixes[mix]; + const std::array& gains = source.gains[mix]; + + // TODO: SIMD implementations + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { + // Mono samples are in the format: (l, r) + // When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one + intermediateMix[sampleIndex][0] += s32(source.currentSamples[sampleIndex][0] * gains[0]); + intermediateMix[sampleIndex][1] += s32(source.currentSamples[sampleIndex][1] * gains[1]); + intermediateMix[sampleIndex][2] += s32(source.currentSamples[sampleIndex][0] * gains[2]); + intermediateMix[sampleIndex][3] += s32(source.currentSamples[sampleIndex][1] * gains[3]); + } + } + } } performMix(read, write); @@ -374,6 +393,21 @@ namespace Audio { } } +#define CONFIG_GAIN(index) \ + if (config.gain##index##Dirty) { \ + auto& dest = source.gains[index]; \ + auto& source = config.gain[index]; \ + \ + dest[0] = float(source[0]); \ + dest[1] = float(source[1]); \ + dest[2] = float(source[2]); \ + dest[3] = float(source[3]); \ + } + CONFIG_GAIN(0); + CONFIG_GAIN(1); + CONFIG_GAIN(2); +#undef CONFIG_GAIN + config.dirtyRaw = 0; } From b299609a9b938ad29694d4346f2e6f5d1290bddb Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 6 Nov 2024 19:26:57 +0200 Subject: [PATCH 02/14] More HLE DSP work --- include/audio/hle_core.hpp | 85 +++++++++++++++++++------------------ src/core/audio/hle_core.cpp | 23 +++++++--- 2 files changed, 60 insertions(+), 48 deletions(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index d05e9808e..2fccd55da 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -16,6 +16,46 @@ namespace Audio { using SampleFormat = HLE::SourceConfiguration::Configuration::Format; using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo; + class DSPMixer { + public: + template + using Sample = std::array; + + template + using Frame = std::array, 160>; + + template + using MonoFrame = Frame; + + template + using StereoFrame = Frame; + + template + using QuadFrame = Frame; + + // Internally the DSP uses four channels when mixing. + // Neatly, QuadFrame means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing + using IntermediateMix = QuadFrame; + + private: + using ChannelFormat = HLE::DspConfiguration::OutputFormat; + // The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages + // Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU + static constexpr usize mixerStageCount = 3; + + public: + ChannelFormat channelFormat = ChannelFormat::Stereo; + std::array volumes; + std::array enableAuxStages; + + void reset() { + channelFormat = ChannelFormat::Stereo; + + volumes.fill(0.0); + enableAuxStages.fill(false); + } + }; + struct DSPSource { // Audio buffer information // https://www.3dbrew.org/wiki/DSP_Memory_Region @@ -49,6 +89,7 @@ namespace Audio { using SampleBuffer = std::deque>; using BufferQueue = std::priority_queue; + DSPMixer::StereoFrame currentFrame; BufferQueue buffers; SampleFormat sampleFormat = SampleFormat::ADPCM; @@ -98,46 +139,6 @@ namespace Audio { DSPSource() { reset(); } }; - class DSPMixer { - public: - template - using Sample = std::array; - - template - using Frame = std::array, 160>; - - template - using MonoFrame = Frame; - - template - using StereoFrame = Frame; - - template - using QuadFrame = Frame; - - // Internally the DSP uses four channels when mixing. - // Neatly, QuadFrame means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing - using IntermediateMix = QuadFrame; - - private: - using ChannelFormat = HLE::DspConfiguration::OutputFormat; - // The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages - // Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU - static constexpr usize mixerStageCount = 3; - - public: - ChannelFormat channelFormat = ChannelFormat::Stereo; - std::array volumes; - std::array enableAuxStages; - - void reset() { - channelFormat = ChannelFormat::Stereo; - - volumes.fill(0.0); - enableAuxStages.fill(false); - } - }; - class HLE_DSP : public DSPCore { // The audio frame types are public in case we want to use them for unit tests public: @@ -159,7 +160,7 @@ namespace Audio { using Source = Audio::DSPSource; using SampleBuffer = Source::SampleBuffer; using IntermediateMix = DSPMixer::IntermediateMix; - + private: enum class DSPState : u32 { Off, @@ -226,7 +227,7 @@ namespace Audio { void outputFrame(); // Perform the final mix, mixing the quadraphonic samples from all voices into the output audio frame void performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion); - + // Decode an entire buffer worth of audio void decodeBuffer(DSPSource& source); diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 9870adef1..4bc548dcc 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -262,10 +262,10 @@ namespace Audio { for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { // Mono samples are in the format: (l, r) // When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one - intermediateMix[sampleIndex][0] += s32(source.currentSamples[sampleIndex][0] * gains[0]); - intermediateMix[sampleIndex][1] += s32(source.currentSamples[sampleIndex][1] * gains[1]); - intermediateMix[sampleIndex][2] += s32(source.currentSamples[sampleIndex][0] * gains[2]); - intermediateMix[sampleIndex][3] += s32(source.currentSamples[sampleIndex][1] * gains[3]); + intermediateMix[sampleIndex][0] += s32(source.currentFrame[sampleIndex][0] * gains[0]); + intermediateMix[sampleIndex][1] += s32(source.currentFrame[sampleIndex][1] * gains[1]); + intermediateMix[sampleIndex][2] += s32(source.currentFrame[sampleIndex][0] * gains[2]); + intermediateMix[sampleIndex][3] += s32(source.currentFrame[sampleIndex][1] * gains[3]); } } } @@ -467,6 +467,9 @@ namespace Audio { } void HLE_DSP::generateFrame(DSPSource& source) { + // Zero out all output samples at first. TODO: Don't zero out the entire frame initially, rather only zero-out the "unwritten" samples when the frame is done being processed. + source.currentFrame = {}; + if (source.currentSamples.empty()) { // There's no audio left to play, turn the voice off if (source.buffers.empty()) { @@ -480,7 +483,7 @@ namespace Audio { decodeBuffer(source); } else { - uint maxSampleCount = uint(float(Audio::samplesInFrame) * source.rateMultiplier); + uint maxSampleCount = uint(float(Audio::samplesInFrame) * 1.0); uint outputCount = 0; while (outputCount < maxSampleCount) { @@ -494,8 +497,14 @@ namespace Audio { const uint sampleCount = std::min(maxSampleCount - outputCount, source.currentSamples.size()); - // samples.insert(samples.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + // Copy samples to current frame buffer + // TODO: Implement linear/polyphase interpolation + std::copy( + source.currentSamples.begin(), std::next(source.currentSamples.begin(), sampleCount), source.currentFrame.begin() + outputCount + ); + // Remove samples from sample buffer source.currentSamples.erase(source.currentSamples.begin(), std::next(source.currentSamples.begin(), sampleCount)); + // Advance sample position source.samplePosition += sampleCount; outputCount += sampleCount; } @@ -718,5 +727,7 @@ namespace Audio { buffers = {}; currentSamples.clear(); + + gains.fill({}); } } // namespace Audio From 8cfffb8119f4e78715d929aedbe304df235daaef Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 7 Nov 2024 22:08:28 +0200 Subject: [PATCH 03/14] HLE DSP: Actually parse InterpolationMode config --- include/audio/hle_core.hpp | 2 ++ src/core/audio/hle_core.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 2fccd55da..29fd45426 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -88,12 +88,14 @@ namespace Audio { // Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque? using SampleBuffer = std::deque>; using BufferQueue = std::priority_queue; + using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode; DSPMixer::StereoFrame currentFrame; BufferQueue buffers; SampleFormat sampleFormat = SampleFormat::ADPCM; SourceType sourceType = SourceType::Stereo; + InterpolationMode interpolationMode = InterpolationMode::Linear; // There's one gain configuration for each of the 3 intermediate mixing stages // And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 4bc548dcc..06e001f11 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -319,6 +319,10 @@ namespace Audio { source.sourceType = config.monoOrStereo; } + if (config.interpolationDirty) { + source.interpolationMode = config.interpolationMode; + } + if (config.rateMultiplierDirty) { source.rateMultiplier = (config.rateMultiplier > 0.f) ? config.rateMultiplier : 1.f; } @@ -499,6 +503,7 @@ namespace Audio { // Copy samples to current frame buffer // TODO: Implement linear/polyphase interpolation + std::copy( source.currentSamples.begin(), std::next(source.currentSamples.begin(), sampleCount), source.currentFrame.begin() + outputCount ); @@ -718,6 +723,7 @@ namespace Audio { // Initialize these to some sane defaults sampleFormat = SampleFormat::ADPCM; sourceType = SourceType::Stereo; + interpolationMode = InterpolationMode::Linear; samplePosition = 0; previousBufferID = 0; From 69e8e1c2c46f848c5b5a58c871c812184d61123f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 23:11:19 +0200 Subject: [PATCH 04/14] Add audio interpolation helpers --- CMakeLists.txt | 2 + include/audio/audio_interpolation.hpp | 58 ++++++++++++++++++++ include/audio/hle_core.hpp | 47 ++--------------- include/audio/hle_mixer.hpp | 50 ++++++++++++++++++ src/core/audio/audio_interpolation.cpp | 73 ++++++++++++++++++++++++++ src/core/audio/hle_core.cpp | 1 + 6 files changed, 188 insertions(+), 43 deletions(-) create mode 100644 include/audio/audio_interpolation.hpp create mode 100644 include/audio/hle_mixer.hpp create mode 100644 src/core/audio/audio_interpolation.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 74fafc04c..3193701d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -294,6 +294,7 @@ set(APPLET_SOURCE_FILES src/core/applets/applet.cpp src/core/applets/mii_selecto ) set(AUDIO_SOURCE_FILES src/core/audio/dsp_core.cpp src/core/audio/null_core.cpp src/core/audio/teakra_core.cpp src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp src/core/audio/aac_decoder.cpp + src/core/audio/audio_interpolation.cpp ) set(RENDERER_SW_SOURCE_FILES src/core/renderer_sw/renderer_sw.cpp) @@ -334,6 +335,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp include/renderdoc.hpp include/align.hpp include/audio/aac_decoder.hpp include/PICA/pica_simd.hpp include/services/fonts.hpp + include/audio/audio_interpolation.hpp include/audio/hle_mixer.hpp ) cmrc_add_resource_library( diff --git a/include/audio/audio_interpolation.hpp b/include/audio/audio_interpolation.hpp new file mode 100644 index 000000000..8a87cbcd2 --- /dev/null +++ b/include/audio/audio_interpolation.hpp @@ -0,0 +1,58 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "audio/hle_mixer.hpp" +#include "helpers.hpp" + +namespace Audio::Interpolation { + // A variable length buffer of signed PCM16 stereo samples. + using StereoBuffer16 = std::deque>; + using StereoFrame16 = Audio::DSPMixer::StereoFrame; + + struct State { + // Two historical samples. + std::array xn1 = {}; //< x[n-1] + std::array xn2 = {}; //< x[n-2] + // Current fractional position. + u64 fposition = 0; + }; + + /** + * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay. + * @param state Interpolation state. + * @param input Input buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. + */ + void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); + + /** + * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay. + * @param state Interpolation state. + * @param input Input buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. + */ + void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); + + /** + * Polyphase interpolation. This is currently stubbed to just perform linear interpolation + * @param state Interpolation state. + * @param input Input buffer. + * @param rate Stretch factor. Must be a positive non-zero value. + * rate > 1.0 performs decimation and rate < 1.0 performs upsampling. + * @param output The resampled audio buffer. + * @param outputi The index of output to start writing to. + */ + void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi); +} // namespace Audio::Interpolation \ No newline at end of file diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 29fd45426..b3832d76b 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -8,54 +8,13 @@ #include "audio/aac.hpp" #include "audio/aac_decoder.hpp" +#include "audio/audio_interpolation.hpp" #include "audio/dsp_core.hpp" #include "audio/dsp_shared_mem.hpp" +#include "audio/hle_mixer.hpp" #include "memory.hpp" namespace Audio { - using SampleFormat = HLE::SourceConfiguration::Configuration::Format; - using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo; - - class DSPMixer { - public: - template - using Sample = std::array; - - template - using Frame = std::array, 160>; - - template - using MonoFrame = Frame; - - template - using StereoFrame = Frame; - - template - using QuadFrame = Frame; - - // Internally the DSP uses four channels when mixing. - // Neatly, QuadFrame means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing - using IntermediateMix = QuadFrame; - - private: - using ChannelFormat = HLE::DspConfiguration::OutputFormat; - // The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages - // Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU - static constexpr usize mixerStageCount = 3; - - public: - ChannelFormat channelFormat = ChannelFormat::Stereo; - std::array volumes; - std::array enableAuxStages; - - void reset() { - channelFormat = ChannelFormat::Stereo; - - volumes.fill(0.0); - enableAuxStages.fill(false); - } - }; - struct DSPSource { // Audio buffer information // https://www.3dbrew.org/wiki/DSP_Memory_Region @@ -89,6 +48,7 @@ namespace Audio { using SampleBuffer = std::deque>; using BufferQueue = std::priority_queue; using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode; + using InterpolationState = Audio::Interpolation::State; DSPMixer::StereoFrame currentFrame; BufferQueue buffers; @@ -96,6 +56,7 @@ namespace Audio { SampleFormat sampleFormat = SampleFormat::ADPCM; SourceType sourceType = SourceType::Stereo; InterpolationMode interpolationMode = InterpolationMode::Linear; + InterpolationState interpolationState; // There's one gain configuration for each of the 3 intermediate mixing stages // And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample diff --git a/include/audio/hle_mixer.hpp b/include/audio/hle_mixer.hpp new file mode 100644 index 000000000..ed8b4a098 --- /dev/null +++ b/include/audio/hle_mixer.hpp @@ -0,0 +1,50 @@ +#pragma once +#include + +#include "audio/dsp_shared_mem.hpp" +#include "helpers.hpp" + +namespace Audio { + using SampleFormat = HLE::SourceConfiguration::Configuration::Format; + using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo; + + class DSPMixer { + public: + template + using Sample = std::array; + + template + using Frame = std::array, 160>; + + template + using MonoFrame = Frame; + + template + using StereoFrame = Frame; + + template + using QuadFrame = Frame; + + // Internally the DSP uses four channels when mixing. + // Neatly, QuadFrame means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing + using IntermediateMix = QuadFrame; + + private: + using ChannelFormat = HLE::DspConfiguration::OutputFormat; + // The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages + // Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU + static constexpr usize mixerStageCount = 3; + + public: + ChannelFormat channelFormat = ChannelFormat::Stereo; + std::array volumes; + std::array enableAuxStages; + + void reset() { + channelFormat = ChannelFormat::Stereo; + + volumes.fill(0.0); + enableAuxStages.fill(false); + } + }; +} // namespace Audio \ No newline at end of file diff --git a/src/core/audio/audio_interpolation.cpp b/src/core/audio/audio_interpolation.cpp new file mode 100644 index 000000000..d13c786ee --- /dev/null +++ b/src/core/audio/audio_interpolation.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "audio/audio_interpolation.hpp" + +#include + +#include "helpers.hpp" + +namespace Audio::Interpolation { + // Calculations are done in fixed point with 24 fractional bits. + // (This is not verified. This was chosen for minimal error.) + static constexpr u64 scaleFactor = 1 << 24; + static constexpr u64 scaleMask = scaleFactor - 1; + + /// Here we step over the input in steps of rate, until we consume all of the input. + /// Three adjacent samples are passed to fn each step. + template + static void stepOverSamples(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi, Function fn) { + if (input.empty()) { + return; + } + + input.insert(input.begin(), {state.xn2, state.xn1}); + + const u64 step_size = static_cast(rate * scaleFactor); + u64 fposition = state.fposition; + usize inputi = 0; + + while (outputi < output.size()) { + inputi = static_cast(fposition / scaleFactor); + + if (inputi + 2 >= input.size()) { + inputi = input.size() - 2; + break; + } + + u64 fraction = fposition & scaleMask; + output[outputi++] = fn(fraction, input[inputi], input[inputi + 1], input[inputi + 2]); + + fposition += step_size; + } + + state.xn2 = input[inputi]; + state.xn1 = input[inputi + 1]; + state.fposition = fposition - inputi * scaleFactor; + + input.erase(input.begin(), std::next(input.begin(), inputi + 2)); + } + + void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { + stepOverSamples(state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { return x0; }); + } + + void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { + // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware. + stepOverSamples(state, input, rate, output, outputi, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { + // This is a saturated subtraction. (Verified by black-box fuzzing.) + s64 delta0 = std::clamp(x1[0] - x0[0], -32768, 32767); + s64 delta1 = std::clamp(x1[1] - x0[1], -32768, 32767); + + return std::array{ + static_cast(x0[0] + fraction * delta0 / scaleFactor), + static_cast(x0[1] + fraction * delta1 / scaleFactor), + }; + }); + } + + void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi) { + linear(state, input, rate, output, outputi); + } +} // namespace Audio::Interpolation \ No newline at end of file diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 06e001f11..593dd4777 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -732,6 +732,7 @@ namespace Audio { rateMultiplier = 1.f; buffers = {}; + interpolationState = {}; currentSamples.clear(); gains.fill({}); From c70388dbeb1b1939caae67d6401d4caa61ea4bd0 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 23:18:41 +0200 Subject: [PATCH 05/14] HLE DSP: Actually interpolate audio --- src/core/audio/hle_core.cpp | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 593dd4777..96e51aaed 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -488,7 +488,7 @@ namespace Audio { decodeBuffer(source); } else { uint maxSampleCount = uint(float(Audio::samplesInFrame) * 1.0); - uint outputCount = 0; + usize outputCount = 0; while (outputCount < maxSampleCount) { if (source.currentSamples.empty()) { @@ -499,19 +499,27 @@ namespace Audio { } } - const uint sampleCount = std::min(maxSampleCount - outputCount, source.currentSamples.size()); + switch (source.interpolationMode) { + case Source::InterpolationMode::Linear: + Audio::Interpolation::linear( + source.interpolationState, source.currentSamples, source.rateMultiplier, source.currentFrame, outputCount + ); + break; + case Source::InterpolationMode::None: + Audio::Interpolation::none( + source.interpolationState, source.currentSamples, source.rateMultiplier, source.currentFrame, outputCount + ); + break; - // Copy samples to current frame buffer - // TODO: Implement linear/polyphase interpolation + case Source::InterpolationMode::Polyphase: + // Currently stubbed to be the same as linear + Audio::Interpolation::polyphase( + source.interpolationState, source.currentSamples, source.rateMultiplier, source.currentFrame, outputCount + ); + break; + } - std::copy( - source.currentSamples.begin(), std::next(source.currentSamples.begin(), sampleCount), source.currentFrame.begin() + outputCount - ); - // Remove samples from sample buffer - source.currentSamples.erase(source.currentSamples.begin(), std::next(source.currentSamples.begin(), sampleCount)); - // Advance sample position - source.samplePosition += sampleCount; - outputCount += sampleCount; + source.samplePosition += u32(outputCount); } } } From 6a793097226eb4ad0c2ad856c0cfe343b1adf7c2 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 23:52:29 +0200 Subject: [PATCH 06/14] HLE DSP: Fix up resampling a bit --- src/core/audio/hle_core.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 96e51aaed..77ed719f7 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -487,10 +487,10 @@ namespace Audio { decodeBuffer(source); } else { - uint maxSampleCount = uint(float(Audio::samplesInFrame) * 1.0); usize outputCount = 0; + static constexpr usize maxSamples = Audio::samplesInFrame; - while (outputCount < maxSampleCount) { + while (outputCount < maxSamples) { if (source.currentSamples.empty()) { if (source.buffers.empty()) { break; @@ -518,9 +518,9 @@ namespace Audio { ); break; } - - source.samplePosition += u32(outputCount); } + + source.samplePosition += u32(outputCount * source.rateMultiplier); } } From 7a4f3f48369009c9960c1d39d42a442e10b8e53d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 10 Nov 2024 14:53:07 +0200 Subject: [PATCH 07/14] HLE DSP: Add passthrough mix detection --- include/audio/hle_core.hpp | 4 ++++ src/core/audio/hle_core.cpp | 31 ++++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index b3832d76b..eed7bcc14 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -61,6 +61,10 @@ namespace Audio { // There's one gain configuration for each of the 3 intermediate mixing stages // And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample std::array, 3> gains; + // Of the 3 intermediate mix stages, typically only the first one is actually enabled and the other ones do nothing + // Ie their gain is vec4(0.0). We track which stages are disabled (have a gain of all 0s) using this bitfield and skip them + // In order to save up on CPU time. + uint enabledMixStages = 0; u32 samplePosition; // Sample number into the current audio buffer float rateMultiplier; diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 77ed719f7..be754c81a 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -255,6 +255,11 @@ namespace Audio { // If the source is still enabled, mix its output into the intermediate mix buffers if (source.enabled) { for (int mix = 0; mix < mixes.size(); mix++) { + // Check if this stage is passthrough, and if it is, then skip it + if ((source.enabledMixStages & (1u << mix)) == 0) { + continue; + } + IntermediateMix& intermediateMix = mixes[mix]; const std::array& gains = source.gains[mix]; @@ -397,16 +402,23 @@ namespace Audio { } } -#define CONFIG_GAIN(index) \ - if (config.gain##index##Dirty) { \ - auto& dest = source.gains[index]; \ - auto& source = config.gain[index]; \ - \ - dest[0] = float(source[0]); \ - dest[1] = float(source[1]); \ - dest[2] = float(source[2]); \ - dest[3] = float(source[3]); \ +#define CONFIG_GAIN(index) \ + if (config.gain##index##Dirty) { \ + auto& dest = source.gains[index]; \ + auto& sourceGain = config.gain[index]; \ + \ + dest[0] = float(sourceGain[0]); \ + dest[1] = float(sourceGain[1]); \ + dest[2] = float(sourceGain[2]); \ + dest[3] = float(sourceGain[3]); \ + \ + if (dest[0] == 0.f && dest[1] == 0.f && dest[2] == 0.f && dest[3] == 0.f) { \ + source.enabledMixStages &= ~(1u << index); \ + } else { \ + source.enabledMixStages |= (1u << index); \ + } \ } + CONFIG_GAIN(0); CONFIG_GAIN(1); CONFIG_GAIN(2); @@ -744,5 +756,6 @@ namespace Audio { currentSamples.clear(); gains.fill({}); + enabledMixStages = 0; } } // namespace Audio From e22bc580600a9a2013ca64ba0d764f52fa5e5607 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 10 Nov 2024 14:55:31 +0200 Subject: [PATCH 08/14] HLE DSP: Format --- src/core/audio/hle_core.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index be754c81a..1e7b6bfb7 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -483,7 +483,8 @@ namespace Audio { } void HLE_DSP::generateFrame(DSPSource& source) { - // Zero out all output samples at first. TODO: Don't zero out the entire frame initially, rather only zero-out the "unwritten" samples when the frame is done being processed. + // Zero out all output samples at first. TODO: Don't zero out the entire frame initially, rather only zero-out the "unwritten" samples when + // the frame is done being processed. source.currentFrame = {}; if (source.currentSamples.empty()) { @@ -556,7 +557,7 @@ namespace Audio { if (config.outputFormatDirty) { mixer.channelFormat = config.outputFormat; } - + if (config.masterVolumeDirty) { mixer.volumes[0] = config.masterVolume; } @@ -564,7 +565,7 @@ namespace Audio { if (config.auxVolume0Dirty) { mixer.volumes[1] = config.auxVolumes[0]; } - + if (config.auxVolume1Dirty) { mixer.volumes[2] = config.auxVolumes[1]; } @@ -726,7 +727,7 @@ namespace Audio { response = request; response.resultCode = AAC::ResultCode::Success; break; - + default: Helpers::warn("Unknown AAC command type"); break; } @@ -752,7 +753,7 @@ namespace Audio { rateMultiplier = 1.f; buffers = {}; - interpolationState = {}; + interpolationState = {}; currentSamples.clear(); gains.fill({}); From 66be960150418f6758d4ea86f67eb037eece4f9f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 19 Nov 2024 01:27:42 +0200 Subject: [PATCH 09/14] HLE DSP: Stub audio output --- src/core/audio/hle_core.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 1e7b6bfb7..3ab05c93d 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -211,11 +211,11 @@ namespace Audio { if (audioEnabled) { // Wait until we've actually got room to push our frame - while (sampleBuffer.size() + 2 > sampleBuffer.Capacity()) { + while (sampleBuffer.size() + frame.size() * 2 > sampleBuffer.Capacity()) { std::this_thread::sleep_for(std::chrono::milliseconds{1}); } - sampleBuffer.push(frame.data(), frame.size()); + sampleBuffer.push(frame.data(), frame.size() * 2); } } @@ -276,6 +276,12 @@ namespace Audio { } } + for (int i = 0; i < Audio::samplesInFrame; i++) { + auto& mix0 = mixes[0]; + auto& sample = mix0[i]; + frame[i] = {s16(sample[0]), s16(sample[2])}; + } + performMix(read, write); } From 47ffd76faefc46f22add87bb93bce9f69f249399 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 19 Nov 2024 01:36:07 +0200 Subject: [PATCH 10/14] HLE DSP: Fix temporary quad -> stereo conversion --- src/core/audio/hle_core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 3ab05c93d..fec795f89 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -279,7 +279,7 @@ namespace Audio { for (int i = 0; i < Audio::samplesInFrame; i++) { auto& mix0 = mixes[0]; auto& sample = mix0[i]; - frame[i] = {s16(sample[0]), s16(sample[2])}; + frame[i] = {s16(sample[0]), s16(sample[1])}; } performMix(read, write); From efb6cdd30d462be35dcf98b8a8d2c3976ba772f3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 19 Nov 2024 02:12:20 +0200 Subject: [PATCH 11/14] Run clang-format --- include/audio/hle_core.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index eed7bcc14..5868a5d00 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -48,7 +48,7 @@ namespace Audio { using SampleBuffer = std::deque>; using BufferQueue = std::priority_queue; using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode; - using InterpolationState = Audio::Interpolation::State; + using InterpolationState = Audio::Interpolation::State; DSPMixer::StereoFrame currentFrame; BufferQueue buffers; @@ -56,7 +56,7 @@ namespace Audio { SampleFormat sampleFormat = SampleFormat::ADPCM; SourceType sourceType = SourceType::Stereo; InterpolationMode interpolationMode = InterpolationMode::Linear; - InterpolationState interpolationState; + InterpolationState interpolationState; // There's one gain configuration for each of the 3 intermediate mixing stages // And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample @@ -221,5 +221,4 @@ namespace Audio { void setSemaphore(u16 value) override {} void setSemaphoreMask(u16 value) override {} }; - } // namespace Audio From 878ff419fd87752c7f51f8ce8ae76e893edee437 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:52:42 +0200 Subject: [PATCH 12/14] DSP: Add SSE quad-conversion code Co-Authored-By: Kelpsy <138107494+kelpsyberry@users.noreply.github.com> --- CMakeLists.txt | 2 +- include/audio/dsp_simd.hpp | 62 +++++++++++++++++++++++++++++++++++++ include/audio/hle_core.hpp | 7 +++-- src/core/audio/hle_core.cpp | 15 +++------ 4 files changed, 73 insertions(+), 13 deletions(-) create mode 100644 include/audio/dsp_simd.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f780387d2..c5e76d5a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,7 +355,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp include/renderdoc.hpp include/align.hpp include/audio/aac_decoder.hpp include/PICA/pica_simd.hpp include/services/fonts.hpp - include/audio/audio_interpolation.hpp include/audio/hle_mixer.hpp + include/audio/audio_interpolation.hpp include/audio/hle_mixer.hpp include/audio/dsp_simd.hpp ) cmrc_add_resource_library( diff --git a/include/audio/dsp_simd.hpp b/include/audio/dsp_simd.hpp new file mode 100644 index 000000000..488234850 --- /dev/null +++ b/include/audio/dsp_simd.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include "audio/hle_mixer.hpp" +#include "compiler_builtins.hpp" +#include "helpers.hpp" + +#if defined(_M_AMD64) || defined(__x86_64__) +#define DSP_SIMD_X64 +#include +#elif defined(_M_ARM64) || defined(__aarch64__) +#define DSP_SIMD_ARM64 +#include +#endif + +// Optimized SIMD functions for mixing the stereo output of a DSP voice into a quadraphonic intermediate mix +namespace DSP::MixIntoQuad { + using IntermediateMix = Audio::DSPMixer::IntermediateMix; + using StereoFrame16 = Audio::DSPMixer::StereoFrame; + + // Non-SIMD, portable algorithm + ALWAYS_INLINE static void mixPortable(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { + // Mono samples are in the format: (l, r) + // When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one + mix[sampleIndex][0] += s32(frame[sampleIndex][0] * gains[0]); + mix[sampleIndex][1] += s32(frame[sampleIndex][1] * gains[1]); + mix[sampleIndex][2] += s32(frame[sampleIndex][0] * gains[2]); + mix[sampleIndex][3] += s32(frame[sampleIndex][1] * gains[3]); + } + } + +#if defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + ALWAYS_INLINE static void mixSSE4_1(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { + // The stereo samples, repeated every 4 bytes inside the vector register + __m128i stereoSamples = _mm_castps_si128(_mm_load1_ps((float*)&frame[sampleIndex][0])); + + __m128 currentFrame = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(stereoSamples)); + __m128 gains_ = _mm_load_ps(gains); + __m128i offset = _mm_cvtps_epi32(_mm_mul_ps(currentFrame, gains_)); + __m128i intermediateMixPrev = _mm_load_si128((__m128i*)&mix[sampleIndex][0]); + __m128i result = _mm_add_epi32(intermediateMixPrev, offset); + _mm_store_si128((__m128i*)&mix[sampleIndex][0], result); + } + } +#endif + +#ifdef DSP_SIMD_ARM64 + ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { mixPortable(mix, frame, gains); } +#endif + + // Mixes the stereo output of a DSP voice into a quadraphonic intermediate mix + static void mix(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { +#if defined(DSP_SIMD_ARM64) + return mixNEON(mix, frame, gains); +#elif defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + return mixSSE4_1(mix, frame, gains); +#else + return mixPortable(mix, frame, gains); +#endif + } +} // namespace DSP::MixIntoQuad \ No newline at end of file diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 5868a5d00..32bbaae8a 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -50,7 +50,9 @@ namespace Audio { using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode; using InterpolationState = Audio::Interpolation::State; - DSPMixer::StereoFrame currentFrame; + // The samples this voice output for this audio frame. + // Aligned to 4 for SIMD purposes. + alignas(4) DSPMixer::StereoFrame currentFrame; BufferQueue buffers; SampleFormat sampleFormat = SampleFormat::ADPCM; @@ -60,7 +62,8 @@ namespace Audio { // There's one gain configuration for each of the 3 intermediate mixing stages // And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample - std::array, 3> gains; + // Aligned to 16 for SIMD purposes + alignas(16) std::array, 3> gains; // Of the 3 intermediate mix stages, typically only the first one is actually enabled and the other ones do nothing // Ie their gain is vec4(0.0). We track which stages are disabled (have a gain of all 0s) using this bitfield and skip them // In order to save up on CPU time. diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index fec795f89..7e82a1398 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -7,6 +7,7 @@ #include #include "audio/aac_decoder.hpp" +#include "audio/dsp_simd.hpp" #include "services/dsp.hpp" namespace Audio { @@ -228,7 +229,9 @@ namespace Audio { // The DSP checks the DSP configuration dirty bits on every frame, applies them, and clears them read.dspConfiguration.dirtyRaw = 0; read.dspConfiguration.dirtyRaw2 = 0; - std::array mixes{}; + + // The intermediate mix buffer is aligned to 16 for SIMD purposes + alignas(16) std::array mixes{}; for (int i = 0; i < sourceCount; i++) { // Update source configuration from the read region of shared memory @@ -263,15 +266,7 @@ namespace Audio { IntermediateMix& intermediateMix = mixes[mix]; const std::array& gains = source.gains[mix]; - // TODO: SIMD implementations - for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { - // Mono samples are in the format: (l, r) - // When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one - intermediateMix[sampleIndex][0] += s32(source.currentFrame[sampleIndex][0] * gains[0]); - intermediateMix[sampleIndex][1] += s32(source.currentFrame[sampleIndex][1] * gains[1]); - intermediateMix[sampleIndex][2] += s32(source.currentFrame[sampleIndex][0] * gains[2]); - intermediateMix[sampleIndex][3] += s32(source.currentFrame[sampleIndex][1] * gains[3]); - } + DSP::MixIntoQuad::mix(intermediateMix, source.currentFrame, gains.data()); } } } From f30eed79801d790ad9186e075b4820243a3ec0b0 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:01:00 +0200 Subject: [PATCH 13/14] DSP: Add NEON quad-conversion code Co-Authored-By: Kelpsy <138107494+kelpsyberry@users.noreply.github.com> --- include/audio/dsp_simd.hpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/include/audio/dsp_simd.hpp b/include/audio/dsp_simd.hpp index 488234850..92cb486fc 100644 --- a/include/audio/dsp_simd.hpp +++ b/include/audio/dsp_simd.hpp @@ -31,13 +31,14 @@ namespace DSP::MixIntoQuad { #if defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) ALWAYS_INLINE static void mixSSE4_1(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { + __m128 gains_ = _mm_load_ps(gains); + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { // The stereo samples, repeated every 4 bytes inside the vector register __m128i stereoSamples = _mm_castps_si128(_mm_load1_ps((float*)&frame[sampleIndex][0])); __m128 currentFrame = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(stereoSamples)); - __m128 gains_ = _mm_load_ps(gains); - __m128i offset = _mm_cvtps_epi32(_mm_mul_ps(currentFrame, gains_)); + __m128i offset = _mm_cvttps_epi32(_mm_mul_ps(currentFrame, gains_)); __m128i intermediateMixPrev = _mm_load_si128((__m128i*)&mix[sampleIndex][0]); __m128i result = _mm_add_epi32(intermediateMixPrev, offset); _mm_store_si128((__m128i*)&mix[sampleIndex][0], result); @@ -46,7 +47,22 @@ namespace DSP::MixIntoQuad { #endif #ifdef DSP_SIMD_ARM64 - ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { mixPortable(mix, frame, gains); } + ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { + float32x4_t gains_ = vld1q_f32(gains); + + for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) { + // Load l and r samples and repeat them every 4 bytes + int32x4_t stereoSamples = vld1q_dup_s32((s32*)&frame[sampleIndex][0]); + // Expand the bottom 4 s16 samples into an int32x4 with sign extension, then convert them to float32x4 + float32x4_t currentFrame = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_s32(stereoSamples)))); + + // Multiply samples by their respective gains, truncate the result, and add it into the intermediate mix buffer + int32x4_t offset = vcvtq_s32_f32(vmulq_f32(currentFrame, gains_)); + int32x4_t intermediateMixPrev = vld1q_s32((s32*)&mix[sampleIndex][0]); + int32x4_t result = vaddq_f32(intermediateMixPrev, offset); + vst1q_s32((s32*)&mix[sampleIndex][0], result); + } + } #endif // Mixes the stereo output of a DSP voice into a quadraphonic intermediate mix From b78450c88d4a4df15937c8f6584070257e7ec69c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:17:08 +0200 Subject: [PATCH 14/14] NEON mixer: Change vaddq_f32 to vaddq_s32 (Thank you Clang) Co-Authored-By: Kelpsy <138107494+kelpsyberry@users.noreply.github.com> --- include/audio/dsp_simd.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/audio/dsp_simd.hpp b/include/audio/dsp_simd.hpp index 92cb486fc..9a0e723a4 100644 --- a/include/audio/dsp_simd.hpp +++ b/include/audio/dsp_simd.hpp @@ -59,7 +59,7 @@ namespace DSP::MixIntoQuad { // Multiply samples by their respective gains, truncate the result, and add it into the intermediate mix buffer int32x4_t offset = vcvtq_s32_f32(vmulq_f32(currentFrame, gains_)); int32x4_t intermediateMixPrev = vld1q_s32((s32*)&mix[sampleIndex][0]); - int32x4_t result = vaddq_f32(intermediateMixPrev, offset); + int32x4_t result = vaddq_s32(intermediateMixPrev, offset); vst1q_s32((s32*)&mix[sampleIndex][0], result); } }