Merge pull request #2 from sboukortt/opensource

Open-source the rest of the codebase
google · Feb 5, 2024 · daf73f7 · daf73f7
2 parents 7da2bb4 + ace1ce3
commit daf73f7
Show file tree

Hide file tree

Showing 18 changed files with 2,573 additions and 8 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "third_party/absl"]
+	path = third_party/absl
+	url = https://github.com/abseil/abseil-cpp.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.10)
+
+project(Tabuli CXX)
+
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(SndFile REQUIRED IMPORTED_TARGET sndfile)
+pkg_check_modules(FFTW3 REQUIRED IMPORTED_TARGET fftw3f)
+find_package(Eigen3 REQUIRED)
+add_subdirectory(third_party/absl)
+
+add_executable(driver_model driver_model/driver_model.cc)
+target_link_libraries(driver_model PkgConfig::SndFile)
+
+foreach (experiment IN ITEMS angular emphasizer revolve spectrum_similarity two_to_three virtual_speakers)
+  add_executable(${experiment} speaker_experiments/${experiment}.cc)
+  target_link_libraries(${experiment} PkgConfig::SndFile absl::flags absl::flags_parse absl::log absl::log_internal_check_impl)
+endforeach ()
+
+target_link_libraries(angular PkgConfig::FFTW3)
+target_link_libraries(spectrum_similarity PkgConfig::FFTW3)
+target_link_libraries(two_to_three PkgConfig::FFTW3)
+
+target_link_libraries(virtual_speakers Eigen3::Eigen)
diff --git a/README.md b/README.md
@@ -1,12 +1,47 @@
-### Overview
+Tabuli is a project for [Wave field synthesis](https://en.wikipedia.org/wiki/Wave_field_synthesis).
 
-Tabuli is a project for Wave field synthesis.
+This project is a study about upmixing 2-channel stereo input audio streams into
+multi-channel (e.g. 16 channel) wave field speaker use.
 
-The project relates to Wave field synthesis hardware simulations,
-machine learning models, and other simulations.
+We produce wave the field synthesis audio using an multi-stage process. The
+states are reverbration separation, dry sound spatial location, rereverbration,
+rendering, and speaker reverse physics modeling. The different stages produce
+wav-files that are compatible with sox as an auxilliary processing tool that can
+be applied for input, output or intermediate results of the stages. 
 
-This is a highly experimental research project at this stage.
+The first step makes an attempt to separate the input stereo audio into three
+stereo audio streams that together sum back into the input stereo. The first of
+them attempts to capture the 'dry' audio of the recording, without reverbration
+and with minimal in-instrument resonances. The second stream will contain some
+early room reverbration and most of the in-instrument resonances. 
+The third stream will contain the sound that has longer reverb within the room
+or concert hall, i.e., late reverb. The separation happens in a module called
+'emphasizer'.
 
-Top level folders roughly outline the area of research / applicability.
-Inside those sub-folders denote sub-projects and usually have `_v#` (version)
-suffix to make it easier to track approach changes / fundamental redesigns.
+In the dry sound spatial location, we use a process that reverses the predicted
+amplitudes of a spatialized source and tries to finds an optimal single source
+for each frequency band to explain a microphone sensitivity pattern that we
+define. In the current practical application we use this process to upmix the
+audio from two tracks to twelwe tracks, but we could use a larger or smaller
+number of tracks in upmixing. We call this model 'angular' as it relates to
+computing the angle of the sound source.
+
+In the rereverbration we attempt to produce a multi-dimensional model of the
+reverbration based on the position of the dry sounds, while maintaining the
+volume of the reverbration within the two measured stages of reverbration -- the
+early and late reverbration.
+
+In the rendering phase of the computation, a 3d-geometric virtual placement of
+sound sources (often a linear array) is rendered to actual speakers, such as a
+wave field speaker. Often it is practical to keep the virtual speakers a bit
+further away from the listener than the actual linear array, as that will allow
+the speakers to collaborate rather than trying to reproduce complex
+interferences.
+
+The last phase of computation, that we call 'driver model', will apply the
+inverse physics of speaker drivers. This may allow us to reduce the impact of
+various non-linearities and oscillations within the speaker driver and
+contribute to the overall experience positively.
+
+Contact Jyrki Alakuijala ([email protected]) for more information about this
+project.
diff --git a/classd_simu/delta_sigma_quantizer.py b/classd_simu/delta_sigma_quantizer.py
@@ -0,0 +1,143 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Quantizer - dequantizer script."""
+# Quantizes the input signal with 1 bit using delta-sigma modulation and
+# dequantizes using a leaky integrator capacitor low pass filter.
+
+import numpy
+import scipy.io.wavfile
+import scipy.signal
+
+_OVERSAMPLE = 32
+
+
+def rmse(original, reproduction):
+  """Calculates RMSE (root mean square error) between original and reproduction.
+
+  Args:
+    original: Original signal (must be array-like).
+    reproduction: Reproduced signal after quntization and dequantization (must
+    be array-like).
+
+  Returns:
+    A scalar RMSE between original and reproduction.
+  """
+  return numpy.sqrt(
+      numpy.mean((numpy.asarray(original) - numpy.asarray(reproduction))**2))
+
+
+def quantize_delta_sigma(sig):
+  """Quantizes sig using 1 bit delta-sigma modulation of second order.
+
+  Args:
+    sig: Signal to be quantized
+
+  Returns:
+    The binary output of the 1 bit delta sigma modulator
+  """
+  # Initialize result array
+  result = numpy.zeros(len(sig) * _OVERSAMPLE)
+
+  # Make sure we have an array
+  sig = numpy.asarray(sig)
+
+  # Upsample the signal with linear interpolation
+
+  # Indices on which we will calculate interpolated values
+  x = numpy.arange(len(sig) * _OVERSAMPLE)
+  # Indices on which we have known values (multiples of _OVERSAMPLE)
+  xp = numpy.arange(len(sig)) * _OVERSAMPLE
+  signal_ovs = numpy.interp(x, xp, sig)
+
+  # Possible quantizer outputs.
+  d_minus_plus = (-1 << 15, 1<<15)
+
+  integrator = 0
+  integrator2 = 0
+
+  for i in range(len(signal_ovs)):
+    integrator += signal_ovs[i]
+    integrator2_prev = integrator2
+
+    # If integrator is over the threshold, decrease by D and add a pulse to
+    # the output
+    # Otherwise if it is not above the threshold add D and add a negative pulse.
+    delta = d_minus_plus[integrator2_prev > 0]
+    result[i] = delta
+    integrator -= delta
+    integrator2 -= delta
+
+    integrator2 += integrator
+
+  return result
+
+
+def dequantize_delta_sigma(signal):
+  """Dequantizes the binary output of the delta sigma modulator.
+
+  Uses a low pass filter with leaky integrators.
+
+  Args:
+    signal: Signal to be dequantized
+
+  Returns:
+    The dequantized signal
+  """
+  retval = numpy.zeros(len(signal) // _OVERSAMPLE)
+  integrator = 0
+  integrator2 = 0
+  integrator3 = 0
+  k = 0.15
+  k2 = k
+  k3 = k
+  kc = 1.0 - k
+  k2c = 1.0 - k2
+  k3c = 1.0 - k3
+
+  # The following loop is implementing the filter of integrators using
+  # arithmetic operations and reassignments. A better solution could describe
+  # the integrators as LTI filters, and calculate their outputs by multiplying
+  # the fourier transform of the input signal and the frequency response of the
+  # integrator. IFFT of this product wields the desired output.
+  # We keep the implementation as is because we want it to correspond to what is
+  # going to happen in assembly.
+  period = 0
+  for i, v in enumerate(signal):
+    integrator = integrator * kc + k * v
+    integrator2 = integrator2 * k2c + k2 * integrator
+    integrator3 = integrator3 * k3c + k3 * integrator2
+
+    period += integrator3
+    if not (i+1) % _OVERSAMPLE:
+      retval[(i+1) // _OVERSAMPLE - 1] = period / _OVERSAMPLE
+      period = 0
+  return retval
+
+
+def demo():
+  """Demo that quantizes and dequantizes an audio signal.
+  """
+  fname = './../testdata/a.wav'
+  rate, data = scipy.io.wavfile.read(fname)
+  print('RMS value of signal: ', rmse(data, numpy.zeros(len(data))))
+
+  result_quant = quantize_delta_sigma(data)
+  result = dequantize_delta_sigma(result_quant)
+  min_length = min(len(data), len(result))
+  trimmed_data = data[:min_length]
+  trimmed_result = result[:min_length]
+
+  print('RMS error: ', rmse(trimmed_data, trimmed_result))
+  scipy.io.wavfile.write('result.wav', rate, result.astype(numpy.int16))