Skip to content

Commit

Permalink
Implemented NSIM similarity measurement.
Browse files Browse the repository at this point in the history
- Implemented NSIM (DOI:10.1016/j.specom.2011.09.004).
- Replaced the geometric norm calculation across time and frequency axes
  with '1 - NSIM' when computing Zimtohrli distance.
- Removed the redundant cpp/zimt/README.md.
- Refactored the dataset management tools to enable optimization without
  saving the intermediary results.
- Implemented simple simulated annealing optimization for some relevant
  Zimtohrli parameters.
- Redid the MOS mapping so that Zimtohrli MOS scores are still on the
  same scale as before the NSIM change.

Before:

|Score type |MSE  |Min score |Max score |Mean score |
|-----------|-----|----------|----------|-----------|
|ViSQOL     |0.09 |0.52      |0.80      |0.62       |
|PESQ       |0.10 |0.48      |0.84      |0.61       |
|Zimtohrli  |0.10 |0.56      |0.74      |0.59       |
|CDPAM      |0.12 |0.44      |0.73      |0.57       |
|PARLAQ     |0.15 |0.47      |0.78      |0.54       |
|PEAQB      |0.18 |0.28      |0.85      |0.51       |
|DPAM       |0.28 |0.19      |0.69      |0.42       |
|WARP-Q     |0.28 |0.07      |0.78      |0.45       |
|GVPMOS     |0.32 |0.01      |0.78      |0.40       |

After:

|Score type |MSE  |Min score |Max score |Mean score |
|-----------|-----|----------|----------|-----------|
|ViSQOL     |0.09 |0.52      |0.80      |0.62       |
|Zimtohrli  |0.09 |0.59      |0.72      |0.60       |
|PESQ       |0.10 |0.48      |0.84      |0.61       |
|CDPAM      |0.12 |0.44      |0.73      |0.57       |
|PARLAQ     |0.15 |0.47      |0.78      |0.54       |
|PEAQB      |0.18 |0.28      |0.85      |0.51       |
|DPAM       |0.28 |0.19      |0.69      |0.42       |
|WARP-Q     |0.28 |0.07      |0.78      |0.45       |
|GVPMOS     |0.32 |0.01      |0.78      |0.40       |
  • Loading branch information
Martin Bruse authored and zond committed May 17, 2024
1 parent 326a3bb commit e92ee29
Show file tree
Hide file tree
Showing 26 changed files with 1,083 additions and 456 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ add_library(zimtohrli_base STATIC
cpp/zimt/masking.h
cpp/zimt/mos.cc
cpp/zimt/mos.h
cpp/zimt/nsim.cc
cpp/zimt/nsim.h
cpp/zimt/zimtohrli.cc
cpp/zimt/zimtohrli.h
)
Expand Down
172 changes: 86 additions & 86 deletions COMPARISON.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions cmake/tests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ add_executable(zimtohrli_test
cpp/zimt/loudness_test.cc
cpp/zimt/masking_test.cc
cpp/zimt/mos_test.cc
cpp/zimt/nsim_test.cc
cpp/zimt/zimtohrli_test.cc
cpp/zimt/test_file_paths.cc
)
Expand Down Expand Up @@ -54,6 +55,7 @@ add_executable(zimtohrli_benchmark
cpp/zimt/filterbank_test.cc
cpp/zimt/loudness_test.cc
cpp/zimt/masking_test.cc
cpp/zimt/nsim_test.cc
cpp/zimt/zimtohrli_test.cc
)
target_link_libraries(zimtohrli_benchmark zimtohrli_base gtest gmock benchmark_main)
2 changes: 1 addition & 1 deletion configure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ if [ "${1}" == "debug" ]; then
(cd debug_build && cmake -G Ninja -DCMAKE_C_FLAGS='-fPIC' -DCMAKE_CXX_FLAGS='-fPIC' -DCMAKE_BUILD_TYPE=RelWithDebInfo ..)
elif [ "${1}" == "asan" ]; then
mkdir -p asan_build
(cd asan_build && cmake -G Ninja -DCMAKE_C_FLAGS='-fPIC' -DZIMTOHRLI_ASAN=1 -DCMAKE_CXX_FLAGS='-fPIC' -DCMAKE_BUILD_TYPE=RelWithDebInfo ..)
(cd asan_build && cmake -G Ninja -DCMAKE_C_FLAGS='-fsanitize=address -fPIC' -DCMAKE_CXX_FLAGS='-fsanitize=address -fPIC' -DCMAKE_LINKER_FLAGS_DEBUG='-fsanitize=address' -DCMAKE_BUILD_TYPE=RelWithDebInfo ..)
else
mkdir -p build
(cd build && cmake -G Ninja -DCMAKE_C_FLAGS='-fPIC' -DCMAKE_CXX_FLAGS='-fPIC' -DCMAKE_BUILD_TYPE=Release ..)
Expand Down
54 changes: 0 additions & 54 deletions cpp/zimt/README.md

This file was deleted.

6 changes: 0 additions & 6 deletions cpp/zimt/compare.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@ ABSL_FLAG(bool, truncate, false,
ABSL_FLAG(float, unwarp_window, 2.0f,
"unwarp window length in seconds, must be greater than 0 if truncate "
"is false and the files are of different lengths");
ABSL_FLAG(float, freq_norm_order, zimtohrli::Zimtohrli{}.freq_norm_order,
"order of the per-step-norm across frequencies");
ABSL_FLAG(float, time_norm_order, zimtohrli::Zimtohrli{}.time_norm_order,
"order of the norm across all time steps");
ABSL_FLAG(bool, normalize_amplitude, true,
"whether to normalize the amplitude of all B sounds to the same max "
"amplitude as the A sound");
Expand Down Expand Up @@ -278,8 +274,6 @@ int Main(int argc, char* argv[]) {
.perceptual_sample_rate = absl::GetFlag(FLAGS_perceptual_sample_rate),
.cam_filterbank =
cam.CreateFilterbank(static_cast<float>(file_a->Info().samplerate)),
.time_norm_order = absl::GetFlag(FLAGS_time_norm_order),
.freq_norm_order = absl::GetFlag(FLAGS_freq_norm_order),
.full_scale_sine_db = absl::GetFlag(FLAGS_full_scale_sine_db),
};

Expand Down
26 changes: 18 additions & 8 deletions cpp/zimt/goohrli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ float DefaultPerceptualSampleRate() {
return zimtohrli::Zimtohrli{}.perceptual_sample_rate;
}

int DefaultNSIMStepWindow() { return zimtohrli::Zimtohrli{}.nsim_step_window; }

int DefaultNSIMChannelWindow() {
return zimtohrli::Zimtohrli{}.nsim_channel_window;
}

EnergyAndMaxAbsAmplitude Measure(const float* signal, int size) {
hwy::AlignedNDArray<float, 1> signal_array({static_cast<size_t>(size)});
hwy::CopyBytes(signal, signal_array.data(), size * sizeof(float));
Expand Down Expand Up @@ -99,20 +105,24 @@ float AnalysisDistance(Zimtohrli zimtohrli, Analysis a, Analysis b,
.value;
}

float GetTimeNormOrder(Zimtohrli zimtohrli) {
return static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->time_norm_order;
int GetNSIMStepWindow(Zimtohrli zimtohrli) {
return static_cast<int>(
static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->nsim_step_window);
}

void SetTimeNormOrder(Zimtohrli zimtohrli, float f) {
static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->time_norm_order = f;
void SetNSIMStepWindow(Zimtohrli zimtohrli, int s) {
static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->nsim_step_window =
static_cast<size_t>(s);
}

float GetFreqNormOrder(Zimtohrli zimtohrli) {
return static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->freq_norm_order;
int GetNSIMChannelWindow(Zimtohrli zimtohrli) {
return static_cast<int>(
static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->nsim_channel_window);
}

void SetFreqNormOrder(Zimtohrli zimtohrli, float f) {
static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->freq_norm_order = f;
void SetNSIMChannelWindow(Zimtohrli zimtohrli, int s) {
static_cast<zimtohrli::Zimtohrli*>(zimtohrli)->nsim_channel_window =
static_cast<size_t>(s);
}

float GetPerceptualSampleRate(Zimtohrli zimtohrli) {
Expand Down
14 changes: 8 additions & 6 deletions cpp/zimt/mos.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,21 @@

namespace zimtohrli {

const std::array<float, 4> params = {3.439e+00, -4.138e-02, 3.008e+00,
-1.354e-01};

namespace {

float sigmoid(float x) { return 1 / (1 + std::exp(-x)); }
const std::array<float, 3> params = {1.000e+00, 7.451e-09, 2.943e+00};

float sigmoid(float x) {
return params[0] / (params[1] + std::exp(params[2] * x));
}

const float zero_crossing_reciprocal = 1.0 / sigmoid(0);

} // namespace

// Optimized using `mos_mapping.ipynb`.
float MOSFromZimtohrli(float zimtohrli_distance) {
return 1 + 2 * (sigmoid(params[0] + params[1] * zimtohrli_distance) +
sigmoid(params[2] + params[3] * zimtohrli_distance));
return 1.0 + 4.0 * sigmoid(zimtohrli_distance) * zero_crossing_reciprocal;
}

} // namespace zimtohrli
6 changes: 3 additions & 3 deletions cpp/zimt/mos_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ namespace zimtohrli {
namespace {

TEST(MOS, MOSFromZimtohrli) {
const std::vector<float> zimt_scores = {5, 20, 40, 80};
const std::vector<float> mos = {4.746790024702545, 4.01181593706087,
2.8773086764995064, 2.0648331964917945};
const std::vector<float> zimt_scores = {0, 0.1, 0.5, 0.7, 1.0};
const std::vector<float> mos = {5.0, 3.9802114963531494, 1.9183233976364136,
1.5097649097442627, 1.210829496383667};
for (size_t index = 0; index < zimt_scores.size(); ++index) {
ASSERT_NEAR(MOSFromZimtohrli(zimt_scores[index]), mos[index], 1e-2);
}
Expand Down
Loading

0 comments on commit e92ee29

Please sign in to comment.