Merge pull request #273 from MinaKh/add_audio_transforms

Adding some audio transforms and augmentations to tonic
neuromorphs · May 15, 2024 · 5a20a54 · 5a20a54
2 parents a4e8a45 + 2fb1664
commit 5a20a54
Show file tree

Hide file tree

Showing 9 changed files with 930 additions and 8 deletions.
diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml
@@ -9,7 +9,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-2022]
-        python-version: ["3.7", "3.9", "3.11"]
+        python-version: ["3.8", "3.10", "3.11"]
     steps:
       - uses: actions/checkout@v3
       - if: matrix.os == 'ubuntu-latest'
@@ -21,8 +21,8 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install requirements
         run: |
-          pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
           pip install -r test/requirements.txt
+          pip install -r test/torch_requirements.txt
           pip install .
       - name: Test with pytest
         run: pytest test
@@ -42,8 +42,8 @@ jobs:
           python-version: 3.9
       - name: Generate coverage report
         run: |
-          pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
           pip install -r test/requirements.txt
+          pip install -r test/torch_requirements.txt
           pip install .
           coverage run -m pytest test
           coverage xml
@@ -63,8 +63,8 @@ jobs:
           python-version: 3.9
       - name: Install dependencies
         run: |
-          pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
           pip install -r docs/requirements.txt
+          pip install -r test/torch_requirements.txt
           pip install .
       - name: Build documentation
         run: cd docs && make clean && make html # Use SPHINXOPTS="-W" to fail on warning.

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -4,8 +4,6 @@ sphinx-book-theme
 sphinx-gallery
 myst_nb
 pbr
-torchvision
 ipywidgets
 matplotlib
-torchdata
 sphinx-autoapi
diff --git a/docs/tutorials/audio_transforms_tutorial.ipynb b/docs/tutorials/audio_transforms_tutorial.ipynb
diff --git a/test/requirements.txt b/test/requirements.txt
@@ -1,8 +1,6 @@
 pytest
 coverage
-torch
 matplotlib
 hdf5plugin
 imageio
-torchdata
 aedat
diff --git a/test/test_audio_augmentations.py b/test/test_audio_augmentations.py
@@ -0,0 +1,142 @@
+import numpy as np
+import pytest
+
+
+def test_random_time_stretch():
+    """Tests the time_stretch transform with synthetic data for 2 scenarions: slowing down and
+    speeding up.
+
+    - verifies if the output of transform is different than the input data
+    - verifies if the length of signal has chanched according to the stretch factor (and it should remain fixed if fix_length flag is True)
+    """
+    from tonic.audio_augmentations import RandomTimeStretch
+
+    np.random.seed(123)
+
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.random.rand(1, sr * sl)
+
+    for fix_length in [False, True]:
+        # verify length of stretched signal
+        slowing_down = RandomTimeStretch(
+            samplerate=sr, sample_length=sl, factors=[0.5], fix_length=fix_length
+        )
+        slow = slowing_down(data)
+
+        assert slow is not data
+
+        if fix_length:
+            assert slow.shape[1] == data.shape[1]
+        else:
+            assert np.allclose(
+                slow.shape[1],
+                data.shape[1] / (slowing_down.factors[0]),
+                rtol=1e-2,
+                atol=1e-3,
+            )
+
+        speeding_up = RandomTimeStretch(
+            samplerate=sr, sample_length=sl, factors=[1.5], fix_length=fix_length
+        )
+        fast = speeding_up(data)
+
+        assert fast is not data
+
+        if fix_length:
+            assert fast.shape[1] == data.shape[1]
+        else:
+            assert np.allclose(
+                fast.shape[1],
+                data.shape[1] / (speeding_up.factors[0]),
+                rtol=1e-2,
+                atol=1e-3,
+            )
+
+
+def test_random_pitch_shift():
+    """Tests the pitch_shift transform with synthetic data.
+
+    - verifies if the output of transform is different than the input data
+    - verifies that the size has not changed
+    """
+    from tonic.audio_augmentations import RandomPitchShift
+
+    np.random.seed(123)
+
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.random.rand(1, sr * sl)
+
+    aug = RandomPitchShift(samplerate=sr)
+    pitch_shifted = aug(data)
+
+    assert pitch_shifted is not data
+
+    assert pitch_shifted.shape[1] == data.shape[1]
+
+
+def test_random_amplitude_scale():
+    """Tests the amplitude_scale transform with synthetic data.
+
+    - verifies if the output of transform is different than the input data
+    - verifies that the size has not changed
+    - verifies that maximum amplitude is in the defined range
+    """
+    from tonic.audio_augmentations import RandomAmplitudeScale
+
+    np.random.seed(123)
+
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.ones((1, sr * sl))
+    min_amp, max_amp = 0.05, 0.15
+
+    aug = RandomAmplitudeScale(samplerate=sr, min_amp=min_amp, max_amp=max_amp)
+    amp_scaled = aug(data)
+
+    assert amp_scaled is not data
+    assert amp_scaled.shape[1] == data.shape[1]
+    assert amp_scaled.max() <= max_amp
+
+
+def test_add_white_noise():
+    """Tests the add_white_noise transform with synthetic data.
+
+    - verifies if the output of transform is different than the input data
+    - verifies that the size has not changed
+    """
+    from tonic.audio_augmentations import AddWhiteNoise
+
+    np.random.seed(123)
+
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.random.rand(1, sr * sl)
+
+    aug = AddWhiteNoise(samplerate=sr)
+    noisy = aug(data)
+    assert noisy is not data
+    assert noisy.shape[1] == data.shape[1]
+
+
+def test_RIR():
+    """Tests the RIR transform with a synthetic data.
+
+    - verifies if the output of transform is different than the input data
+    - verifies that the size has not changed
+    """
+    from tonic.audio_augmentations import RIR
+
+    np.random.seed(123)
+
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.random.rand(1, sr * sl).astype("float32")
+    rir_audio_path = (
+        "tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav"
+    )
+    aug = RIR(samplerate=sr, rir_audio=rir_audio_path)
+    RIR_augmented = aug(data)
+    assert RIR_augmented is not data
+    assert RIR_augmented.shape[1] == data.shape[1]
diff --git a/test/test_audio_transforms.py b/test/test_audio_transforms.py
@@ -80,3 +80,60 @@ def __getitem__(self, item):
 
     signal = add_noise(data)
     assert signal.shape == (1, 16_000)
+
+
+def test_swap_axes():
+    """Tests SwapAxes transform with synthetic data."""
+    from tonic.audio_transforms import SwapAxes
+
+    np.random.seed(123)
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.random.rand(1, sr * sl)
+    ax1, ax2 = 0, 1
+    swap_ax = SwapAxes(ax1=ax1, ax2=ax2)
+    swaped = swap_ax(data)
+
+    assert swaped.shape[0] == data.shape[1]
+    assert swaped.shape[1] == data.shape[0]
+
+
+def test_amplitude_scale():
+    """Tests the amplitude scaling transform with synthetic data."""
+    from tonic.audio_transforms import AmplitudeScale
+
+    np.random.seed(123)
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.random.rand(1, sr * sl)
+    max_amps = np.random.rand(10)
+
+    for amp in max_amps:
+        AmpScale = AmplitudeScale(max_amplitude=amp)
+        transformed = AmpScale(data)
+        assert data.shape[1] == transformed.shape[1]
+        assert transformed.max() == amp
+
+
+def test_robust_amplitude_scale():
+    """Tests robust amplitude scaling transform with a synthetic data."""
+    from tonic.audio_transforms import RobustAmplitudeScale
+
+    np.random.seed(123)
+    sr = 16_000  # sample rate
+    sl = 1  # sample length
+    data = np.random.rand(1, sr * sl)
+    max_amps = np.random.rand(10)
+    percent = 0.01
+    for amp in max_amps:
+        RobustAmpScale = RobustAmplitudeScale(
+            max_robust_amplitude=amp, outlier_percent=percent
+        )
+        transformed = RobustAmpScale(data)
+        sorted_transformed = np.sort(np.abs(transformed.ravel()))
+        non_outlier = sorted_transformed[
+            0 : int(np.floor(len(sorted_transformed)) * (1 - percent))
+        ]
+        print(non_outlier)
+        assert data.shape[1] == transformed.shape[1]
+        assert np.all(non_outlier <= amp)
diff --git a/test/torch_requirements.txt b/test/torch_requirements.txt
@@ -0,0 +1,5 @@
+--index-url https://download.pytorch.org/whl/cpu
+torch==2.1.0
+torchaudio==2.1.0
+torchvision==0.16.0
+torchdata