Skip to content

Commit

Permalink
feat: fix up #875 about MARGIN_WIDTH and FEATURE_SIZE (#880)
Browse files Browse the repository at this point in the history
#875 にて `extern "C" fn generate_full_intermediate`の中で定義された二つ
の定数について、以下の変更を加える。

`MARGIN_WIDTH`: Rust APIのものを持って来るようにすることで重複を避ける
`FEATURE_SIZE`: 出力サイズのチェック時に失敗したとき、`ncols`が
                `FEATURE_SIZE`と異なるならパニックメッセージを別のものに
                する

Refs: #875
  • Loading branch information
qryxip authored Dec 1, 2024
1 parent 1d24929 commit 32d7aeb
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
5 changes: 4 additions & 1 deletion crates/voicevox_core/src/__internal/interop.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
pub mod raii;

pub use crate::{metas::merge as merge_metas, synthesizer::blocking::PerformInference};
pub use crate::{
metas::merge as merge_metas,
synthesizer::{blocking::PerformInference, MARGIN},
};
4 changes: 3 additions & 1 deletion crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::asyncs::{Async, BlockingThreadPool, SingleTasked};

pub use self::inner::MARGIN;

/// [`blocking::Synthesizer::synthesis`]および[`nonblocking::Synthesizer::synthesis`]のオプション。
///
/// [`blocking::Synthesizer::synthesis`]: blocking::Synthesizer::synthesis
Expand Down Expand Up @@ -132,7 +134,7 @@ mod inner {
const PADDING_FRAME_LENGTH: usize = 38; // (0.4秒 * 24000Hz / 256.0).round()
/// 音声生成の際、音声特徴量の前後に確保すべきマージン幅(フレーム数)
/// モデルの受容野から計算される
const MARGIN: usize = 14;
pub const MARGIN: usize = 14;
/// 指定した音声区間に対応する特徴量を両端にマージンを追加した上で切り出す
fn crop_with_margin(audio: &AudioFeature, range: Range<usize>) -> ndarray::ArrayView2<'_, f32> {
if range.start > audio.frame_length || range.end > audio.frame_length {
Expand Down
10 changes: 7 additions & 3 deletions crates/voicevox_core_c_api/src/compatible_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,15 +374,15 @@ pub unsafe extern "C" fn generate_full_intermediate(
speaker_id: *mut i64,
output: *mut f32,
) -> bool {
use voicevox_core::__internal::interop::MARGIN as MARGIN_WIDTH;
const FEATURE_SIZE: usize = 80;
init_logger_once();
assert_aligned(f0);
assert_aligned(phoneme);
assert_aligned(speaker_id);
assert_aligned(output);
let length = length as usize;
let phoneme_size = phoneme_size as usize;
const MARGIN_WIDTH: usize = 14;
const FEATURE_SIZE: usize = 80;
let synthesizer = &*lock_synthesizer();
let result = ensure_initialized!(synthesizer).generate_full_intermediate(
length,
Expand All @@ -396,7 +396,11 @@ pub unsafe extern "C" fn generate_full_intermediate(
Ok(output_arr) => {
let output_len = (length + 2 * MARGIN_WIDTH) * FEATURE_SIZE;
if output_arr.len() != output_len {
panic!("expected {}, got {}", output_len, output_arr.len());
if output_arr.ncols() != FEATURE_SIZE {
panic!("the feature size is expected to be {FEATURE_SIZE}");
} else {
panic!("expected {}, got {}", output_len, output_arr.len());
}
}
let output_arr = output_arr.as_standard_layout();
// SAFETY: The safety contract must be upheld by the caller.
Expand Down

0 comments on commit 32d7aeb

Please sign in to comment.